#### Hyper-parameters are passed as arguments to the constructor of the estimator classes. Typical examples include C, kernel and gamma for Support Vector Classifier, alpha for Lasso, etc.

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [61]:
# Importing the dataset
dataset = pd.read_csv('Social_Network_Ads.csv')

In [41]:
#Dummy Encoding ouput values
from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()
gender_labeled = enc.fit_transform(dataset.iloc[:, 1])

In [42]:
dataset.insert(loc=2, column='Gender_Labeled', value=gender_labeled)
dataset.head(5)

Unnamed: 0,User ID,Gender,Gender_Labeled,Age,EstimatedSalary,Purchased
0,15624510,Male,1,19,19000,0
1,15810944,Male,1,35,20000,0
2,15668575,Female,0,26,43000,0
3,15603246,Female,0,27,57000,0
4,15804002,Male,1,19,76000,0


In [87]:
#Separating features and outcome columns
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, -1].values

In [63]:
X[:5]

array([[   19, 19000],
       [   35, 20000],
       [   26, 43000],
       [   27, 57000],
       [   19, 76000]], dtype=int64)

In [64]:
y[:5]

array([0, 0, 0, 0, 0], dtype=int64)

In [108]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [109]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)



In [110]:
# Fitting KNN to the Training set
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [111]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [112]:
#Confusing Matrix
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

array([[64,  4],
       [ 3, 29]], dtype=int64)

In [113]:
#Model Accuracy
from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, y_pred)*100
print('Accuracy of the model is {} %'.format(round(acc, 2)))

Accuracy of the model is 93.0 %


### Using Grid Search

In [114]:
#Grid Search to find optinal parameters
from sklearn.model_selection import GridSearchCV

parameters = [{'n_neighbors':[3, 5, 10, 15], 'algorithm':['ball_tree', 'kd_tree', 'brute'], 'leaf_size':[10, 20, 30, 40, 50], 'p':[1, 2, 3]}]

grid_search = GridSearchCV(estimator = classifier, 
                           param_grid = parameters,
                           scoring = 'accuracy',
                          cv = 10,
                          n_jobs=-1)

grid_search = grid_search.fit(X_train, y_train)

In [115]:
best_accuracy = grid_search.best_score_

best_accuracy

0.9

In [116]:
best_parameters = grid_search.best_params_

best_parameters

{'algorithm': 'ball_tree', 'leaf_size': 10, 'n_neighbors': 10, 'p': 1}

#### Now we apply the baove best parameters to KNN Classifier

In [117]:
# Fitting KNN to the Training set with optimum parameters
classifier_new = KNeighborsClassifier(n_neighbors=10, p=1, leaf_size=10, algorithm='ball_tree')
classifier_new.fit(X_train, y_train)

y_pred = classifier_new.predict(X_test)

acc = accuracy_score(y_test, y_pred)*100
print('Accuracy of above model is {} %'.format(round(acc, 2)))

Accuracy of above model is 93.0 %


#### The accuracy is same in both the cases since our dataset is small. However for a large dataset, there would be significant difference between the two.

### Cross Validating Scores of both models

In [123]:
#For model with default parameters
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=10)

print(f'Average accuracy of default KNN model is {accuracies.mean()} and standard deviation is {accuracies.std()}')

Average accuracy of default KNN model is 0.897304412309974 and standard deviation is 0.06752974589837721


In [124]:
#For model with best parameters
accuracies = cross_val_score(estimator=classifier_new, X=X_train, y=y_train, cv=10)

print(f'Average accuracy of new KNN model is {accuracies.mean()} and standard deviation is {accuracies.std()}')

Average accuracy of new KNN model is 0.9004226918798665 and standard deviation is 0.05081830738377566


#### From above scores, it is evident that our optimized model is a better fit for this classification prediction.