In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression

In [2]:
df_org = pd.read_csv('data/Social_Network_AdsYesNo.csv')

In [3]:
#Categorical data - One hot Encoding
df=pd.get_dummies(data=df_org, columns=['Gender'])

In [4]:
#Prepare data (X) and label (y)
X = df.drop(['User ID','Purchased'],axis=1)
y = df['Purchased']
X = X.to_numpy()
y = y.to_numpy()

In [5]:
#Separate data : Training and Testing
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
# K folds  https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html#sklearn.model_selection.StratifiedKFold
skf = StratifiedKFold(n_splits=5) 


In [9]:
i=1
for train_index, test_index in skf.split(X,y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Data Normalization 
    sc_x = StandardScaler()
    sc_x = sc_x.fit(X_train)
    xtrain =  sc_x.transform(X_train) 

    #Create model
    classifier = LogisticRegression(random_state = 0)
    classifier.fit(xtrain, y_train)
    
    #Prediction
    xtest = sc_x.transform(X_test)
    y_pred = classifier.predict(xtest)

    #Evaluation
    print('Fold', i)
    cm = confusion_matrix(y_test, y_pred)
    eval_report = classification_report(y_test, y_pred)
    print ("Confusion Matrix : \n", cm)
    print ("Summary : \n", eval_report)
    print('----------------------------------------------------------')
    i = i+1 

Fold 1
Confusion Matrix : 
 [[51  1]
 [23  5]]
Summary : 
               precision    recall  f1-score   support

          No       0.69      0.98      0.81        52
         Yes       0.83      0.18      0.29        28

    accuracy                           0.70        80
   macro avg       0.76      0.58      0.55        80
weighted avg       0.74      0.70      0.63        80

----------------------------------------------------------
Fold 2
Confusion Matrix : 
 [[51  1]
 [ 3 25]]
Summary : 
               precision    recall  f1-score   support

          No       0.94      0.98      0.96        52
         Yes       0.96      0.89      0.93        28

    accuracy                           0.95        80
   macro avg       0.95      0.94      0.94        80
weighted avg       0.95      0.95      0.95        80

----------------------------------------------------------
Fold 3
Confusion Matrix : 
 [[50  1]
 [ 4 25]]
Summary : 
               precision    recall  f1-score   suppo

In [6]:
# Data Normalization 
sc_x = StandardScaler()
sc_x = sc_x.fit(X_train)
xtrain =  sc_x.transform(X_train) 


In [7]:
#Create model

classifier = LogisticRegression(random_state = 0)
classifier.fit(xtrain, y_train)


In [8]:
#Display some parameters
print(classifier.coef_)
print(classifier.intercept_)
print(classifier.classes_)


[[ 2.09617526  1.11788402 -0.04789188  0.04789188]]
[-0.96134888]
['No' 'Yes']


In [20]:
#Prediction
xtest = sc_x.transform(X_test)
y_pred = classifier.predict(xtest)


In [22]:
#Display as table to compare actual vs. predicted 
tab = pd.DataFrame()
tab['actual'] = y_test
tab['predict'] = y_pred

In [32]:
#Evaluation
cm = confusion_matrix(y_test, y_pred)
eval_report = classification_report(y_test, y_pred)
print ("Confusion Matrix : \n", cm)
print ("Summary : \n", eval_report)

Confusion Matrix : 
 [[65  3]
 [ 6 26]]
Summary : 
               precision    recall  f1-score   support

          No       0.92      0.96      0.94        68
         Yes       0.90      0.81      0.85        32

    accuracy                           0.91       100
   macro avg       0.91      0.88      0.89       100
weighted avg       0.91      0.91      0.91       100

