##### Dataset

In [2]:
# Social_Network_Ads.csv
# https://mitu.co.in/dataset

##### Import the libraries

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

##### Load the dataset

In [4]:
df = pd.read_csv('datasets/Social_Network_Ads.csv')

In [5]:
# input
x = df[['Age','EstimatedSalary']]
y = df['Purchased']

##### Feature Scaling

In [6]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)

##### Cross Validation

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, random_state=0)

##### Import all classifiers

In [8]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

##### Create objects

In [80]:
nb = GaussianNB()
log = LogisticRegression()
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(random_state=0)
svm = SVC(kernel='poly', random_state=0)
rf = RandomForestClassifier(random_state=0, n_estimators=50)

##### Train the models

In [81]:
nb.fit(x_train, y_train)
dt.fit(x_train, y_train)
rf.fit(x_train, y_train)
knn.fit(x_train, y_train)
svm.fit(x_train, y_train)
log.fit(x_train, y_train)

In [82]:
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.metrics import classification_report, accuracy_score

In [83]:
y_pred_nb = nb.predict(x_test)
y_pred_dt = dt.predict(x_test)
y_pred_svm = svm.predict(x_test)
y_pred_knn = knn.predict(x_test)
y_pred_rf = rf.predict(x_test)
y_pred_log = log.predict(x_test)

In [84]:
print("NB:", accuracy_score(y_test, y_pred_nb))
print("DT:", accuracy_score(y_test, y_pred_dt))
print("RF:", accuracy_score(y_test, y_pred_rf))
print("KNN", accuracy_score(y_test, y_pred_knn))
print("SVM:", accuracy_score(y_test, y_pred_svm))
print("LogR:", accuracy_score(y_test, y_pred_log))

NB: 0.9
DT: 0.9
RF: 0.92
KNN 0.93
SVM: 0.92
LogR: 0.89


##### Stacking Classification

In [85]:
from sklearn.ensemble import StackingClassifier

In [94]:
stack = StackingClassifier(estimators=[('NB',nb),
                                    ('DT',dt),
                                    ('RF',rf),
                                    ('KNN',knn)], final_estimator=log)

In [95]:
stack.fit(x_train, y_train)

In [96]:
y_pred = stack.predict(x_test)

In [97]:
accuracy_score(y_test, y_pred)

0.94

In [31]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.94      0.95        68
           1       0.88      0.91      0.89        32

    accuracy                           0.93       100
   macro avg       0.92      0.92      0.92       100
weighted avg       0.93      0.93      0.93       100



In [48]:
confusion_matrix(y_test, y_pred)

array([[64,  4],
       [ 3, 29]])