## K Nearest Neighbour Classifier

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [3]:
from sklearn.datasets import make_classification

x,y = make_classification(
    n_samples=1000,     # 1000 observations
    n_features=3,       # 3 total features
    n_redundant=1,
    n_classes=2,        # binary target/label
    random_state=999
)

In [4]:
x

array([[-0.33504974,  0.02852654,  1.16193084],
       [-1.37746253, -0.4058213 ,  0.44359618],
       [-1.04520026, -0.72334759, -3.10470423],
       ...,
       [-0.75602574, -0.51816111, -2.20382324],
       [ 0.56066316, -0.07335845, -2.15660348],
       [-1.87521902, -1.11380394, -4.04620773]])

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.33,
                                                    random_state=42)

In [9]:
from sklearn.neighbors import KNeighborsClassifier

In [10]:
classifier = KNeighborsClassifier(n_neighbors=5,algorithm='auto')
classifier.fit(x_train,y_train)

In [11]:
y_pred = classifier.predict(x_test)

In [13]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [14]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[158  11]
 [ 20 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       169
           1       0.93      0.88      0.90       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



In [None]:
## Task
GridsearchCV
for i k=1,2,3,4,5,6,7,8,9,10

## K best

In [18]:
## Hypeparameter Tuning
params={'n_neighbors':[1,2,3,4,5,6,7,8,9,10],
              'weights':['uniform', 'distance']
              }

In [19]:
from sklearn.model_selection import GridSearchCV

In [21]:
cv = GridSearchCV(classifier,param_grid=params,scoring='accuracy',
                  cv=5,verbose=3)
cv.fit(x_train,y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END ....n_neighbors=1, weights=uniform;, score=0.873 total time=   0.0s
[CV 2/5] END ....n_neighbors=1, weights=uniform;, score=0.903 total time=   0.0s
[CV 3/5] END ....n_neighbors=1, weights=uniform;, score=0.866 total time=   0.0s
[CV 4/5] END ....n_neighbors=1, weights=uniform;, score=0.858 total time=   0.0s
[CV 5/5] END ....n_neighbors=1, weights=uniform;, score=0.888 total time=   0.0s
[CV 1/5] END ...n_neighbors=1, weights=distance;, score=0.873 total time=   0.0s
[CV 2/5] END ...n_neighbors=1, weights=distance;, score=0.903 total time=   0.0s
[CV 3/5] END ...n_neighbors=1, weights=distance;, score=0.866 total time=   0.0s
[CV 4/5] END ...n_neighbors=1, weights=distance;, score=0.858 total time=   0.0s
[CV 5/5] END ...n_neighbors=1, weights=distance;, score=0.888 total time=   0.0s
[CV 1/5] END ....n_neighbors=2, weights=uniform;, score=0.881 total time=   0.0s
[CV 2/5] END ....n_neighbors=2, weights=uniform

In [22]:
cv.best_params_

{'n_neighbors': 9, 'weights': 'uniform'}

In [23]:
classifier = KNeighborsClassifier(n_neighbors=9,algorithm='auto')
classifier.fit(x_train,y_train)

In [24]:
y_pred = classifier.predict(x_test)

In [25]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[156  13]
 [ 16 145]]
0.9121212121212121
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       169
           1       0.92      0.90      0.91       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

