In [269]:
import numpy as np
from math import factorial
from matplotlib import pyplot
from pandas import DataFrame
from pandas import read_csv

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

from random import random as random
from warnings import filterwarnings as message
from time import sleep
message('ignore')

In [270]:
df=read_csv('log_reg.csv')
df=df.dropna()
df.index=range(len(df.index))
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [254]:
X = df.drop(["User ID", "Purchased"], axis=1)
y = df[["Purchased"]]
features_num = ["Age", "EstimatedSalary"]
features_cat = ["Gender"]
preprocessor = ColumnTransformer([("OneHotEncoder", OneHotEncoder(), features_cat),
                                 ("Normalization", StandardScaler(), features_num)], remainder="passthrough")
X = DataFrame(preprocessor.fit_transform(X))

In [255]:
X

Unnamed: 0,0,1,2,3
0,0.0,1.0,-1.781797,-1.490046
1,0.0,1.0,-0.253587,-1.460681
2,1.0,0.0,-1.113206,-0.785290
3,1.0,0.0,-1.017692,-0.374182
4,0.0,1.0,-1.781797,0.183751
...,...,...,...,...
395,1.0,0.0,0.797057,-0.844019
396,0.0,1.0,1.274623,-1.372587
397,1.0,0.0,1.179110,-1.460681
398,0.0,1.0,-0.158074,-1.078938


In [256]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [257]:
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [258]:
y_predict = model.predict(X_test)
y_predict

array([1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1])

In [259]:
weights=model.coef_[0]
weights

array([-0.27360634,  0.27360879,  2.42557551,  1.20094462])

In [260]:
bias=model.intercept_[0]
bias

-1.085011310632688

In [261]:
y_predict

array([1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1])

In [262]:
y_test.index=range(len(y_test['Purchased']))
y_test=y_test['Purchased']
y_test=np.array(y_test)
y_test

array([1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0])

In [263]:
np.sum(y_test==y_predict)/len(y_test)

0.8375

# Example


In [264]:
example_data = [['Male', 50,43000],['Male', 60,36000],['Female', 36,319100]]
example_data= DataFrame(example_data)
example_data.columns = ['Gender', 'Age', 'EstimatedSalary']
example_data

Unnamed: 0,Gender,Age,EstimatedSalary
0,Male,50,43000
1,Male,60,36000
2,Female,36,319100


In [265]:
example_data = preprocessor.fit_transform(example_data)

In [266]:
example_data=DataFrame(example_data)

In [267]:
example_data

Unnamed: 0,0,1,2,3
0,0.0,1.0,0.135457,-0.680392
1,0.0,1.0,1.151385,-0.733489
2,1.0,0.0,-1.286842,1.413881


In [268]:
example_data['predict_example']= model.predict(example_data)
example_data

Unnamed: 0,0,1,2,3,predict_example
0,0.0,1.0,0.135457,-0.680392,0
1,0.0,1.0,1.151385,-0.733489,1
2,1.0,0.0,-1.286842,1.413881,0
