In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('../../Datasets/Social_Network_Ads.csv')

df.drop('User ID', axis=1, inplace=True)
df.head(5)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [3]:
df.Gender = pd.get_dummies(df.Gender, drop_first=True)
X = df.to_numpy()

np.random.seed = 0
X = X[np.random.permutation(X.shape[0])]

y = X[:, -1]
X = X[:, :-1]

In [4]:
split = int(X.shape[0] * 0.8)

X_train = X[:split]
y_train = y[:split]

X_test = X[split:]
y_test = y[split:]

In [5]:
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

In [6]:
from sklearn.ensemble import VotingClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [17]:
clfs = [('LR', LogisticRegression()),
        ('KNN3', KNeighborsClassifier(n_neighbors=3)),
        ('KNN5', KNeighborsClassifier(n_neighbors=5)),
        ('KNN7', KNeighborsClassifier(n_neighbors=7)),
        ('KNN9', KNeighborsClassifier(n_neighbors=9)),
        ('SVM_l', SVC(kernel='linear')),
        ('SVM_r', SVC(kernel='rbf')),
        ('DT2', DecisionTreeClassifier(max_depth=2)),
        ('DT3', DecisionTreeClassifier(max_depth=3))]

In [18]:
for name, clf in clfs:
    clf.fit(X_train, y_train)
    print('{}:\ttrain_acc:{},\ttest_acc:{}'.format(
        name,
        clf.score(X_train, y_train),
        clf.score(X_test, y_test),
        ))

LR:	train_acc:0.83125,	test_acc:0.875
KNN3:	train_acc:0.921875,	test_acc:0.925
KNN5:	train_acc:0.9125,	test_acc:0.9375
KNN7:	train_acc:0.915625,	test_acc:0.925
KNN9:	train_acc:0.9125,	test_acc:0.925
SVM_r:	train_acc:0.909375,	test_acc:0.925
DT2:	train_acc:0.909375,	test_acc:0.925
DT3:	train_acc:0.909375,	test_acc:0.925


In [19]:
en_clf = VotingClassifier(clfs, n_jobs=-1)
en_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('LR', LogisticRegression()),
                             ('KNN3', KNeighborsClassifier(n_neighbors=3)),
                             ('KNN5', KNeighborsClassifier()),
                             ('KNN7', KNeighborsClassifier(n_neighbors=7)),
                             ('KNN9', KNeighborsClassifier(n_neighbors=9)),
                             ('SVM_r', SVC()),
                             ('DT2', DecisionTreeClassifier(max_depth=2)),
                             ('DT3', DecisionTreeClassifier(max_depth=3))],
                 n_jobs=-1)

In [20]:
print(en_clf.score(X_train, y_train))
print(en_clf.score(X_test, y_test))

0.915625
0.925
