#### Step 1: Importing libraries

In [49]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


#### Step 2: Load Dataset to Pandas Dataframe

In [50]:
# install library for reading xlsx
# !pip install openpyxl

In [51]:
df = pd.read_excel('dataset/customer.xlsx')
df

Unnamed: 0,gender,age,salary,purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


#### Step 3: Encoding gender column

In [52]:
encoder = LabelEncoder()
df['gender'] = encoder.fit_transform(df['gender'])
df

Unnamed: 0,gender,age,salary,purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
...,...,...,...,...
395,0,46,41000,1
396,1,51,23000,1
397,0,50,20000,1
398,1,36,33000,0


#### Step 4: Define feature and target data

In [53]:
x = df[['gender', 'age', 'salary']].values      # feature
y = df['purchased'].values                      # target

print(x)
print(y)

[[    1    19 19000]
 [    1    35 20000]
 [    0    26 43000]
 ...
 [    0    50 20000]
 [    1    36 33000]
 [    0    49 36000]]
[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1
 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 0 1 1 0 1 0 0 1
 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0
 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 1 0
 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 1 0 1
 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1]


#### Step 5: Split Data for train and test

In [54]:
x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    test_size=0.2,
    random_state=0
)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(320, 3)
(80, 3)
(320,)
(80,)


#### Step 6: Scale the data after split

In [55]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

print(x_train)
print(x_test)

[[ 1.02532046e+00  1.92295008e+00  2.14601566e+00]
 [-9.75304830e-01  2.02016082e+00  3.78719297e-01]
 [-9.75304830e-01 -1.38221530e+00 -4.32498705e-01]
 [-9.75304830e-01 -1.18779381e+00 -1.01194013e+00]
 [-9.75304830e-01  1.92295008e+00 -9.25023920e-01]
 [-9.75304830e-01  3.67578135e-01  2.91803083e-01]
 [-9.75304830e-01  1.73156642e-01  1.46942725e-01]
 [ 1.02532046e+00  2.02016082e+00  1.74040666e+00]
 [-9.75304830e-01  7.56421121e-01 -8.38107706e-01]
 [-9.75304830e-01  2.70367388e-01 -2.87638347e-01]
 [ 1.02532046e+00  3.67578135e-01 -1.71750061e-01]
 [-9.75304830e-01 -1.18475597e-01  2.20395980e+00]
 [-9.75304830e-01 -1.47942605e+00 -6.35303205e-01]
 [ 1.02532046e+00 -1.28500455e+00 -1.06988428e+00]
 [ 1.02532046e+00 -1.38221530e+00  4.07691369e-01]
 [-9.75304830e-01 -1.09058306e+00  7.55356227e-01]
 [ 1.02532046e+00 -1.47942605e+00 -2.00722133e-01]
 [ 1.02532046e+00  9.50842613e-01 -1.06988428e+00]
 [ 1.02532046e+00  9.50842613e-01  5.81523798e-01]
 [ 1.02532046e+00  3.67578135e-

#### Step 7: Create model and train

In [56]:
model = LogisticRegression()
model.fit(x_train, y_train)

#### Step 8: Predicting data from model

In [60]:
gender = 'Male'
age = 45
salary = 45000
# gender = 'Female'
# age = 49
# salary = 36000

gender_enc = encoder.transform([gender])[0]

x_predict = [[gender_enc,age,salary]]
x_predict_sc = scaler.transform(x_predict)

print('Predict: ', x_predict_sc)

Predict:  [[ 1.02532046  0.65921037 -0.72221942]]


#### Step 9: Prediction

In [63]:
y_predict = model.predict(x_predict_sc)
# print(y_predict)

# print predict value to realistic value
print('The predict result')
p = 'Not Purchase' if y_predict[0] == 0 else 'Purchase'
print(f'gender: {gender}, age: {age}, salary: {salary} =>', p)

The predict result
gender: Male, age: 45, salary: 45000 => Not Purchase
