## KNN

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, #1000 observations
                           n_features = 3, # 3 total features
                           n_redundant=1,
                           n_classes=2, # bindary target/label
                           random_state=999)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=42,
                                                    test_size=0.33)

In [4]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(algorithm='auto', n_neighbors=5)
classifier.fit(X_train, y_train)

In [5]:
y_pred = classifier.predict(X_test)

In [6]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [7]:
confusion_matrix(y_pred, y_test)

array([[158,  20],
       [ 11, 141]])

In [8]:
accuracy_score(y_pred, y_test)

0.906060606060606

In [9]:
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



## HYPER PARAMETER TUNING 

In [20]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier()

In [21]:
from sklearn.model_selection import GridSearchCV

In [23]:
parameter = {'n_neighbors' : [1,2,3],
             'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute']}

In [24]:
knn_cls = GridSearchCV(classifier,param_grid=parameter)

In [25]:
knn_cls.fit(X_train, y_train)

In [26]:
knn_cls.best_params_

{'algorithm': 'auto', 'n_neighbors': 3}

In [27]:
y_pred_grid = knn_cls.predict(X_test)

In [28]:
confusion_matrix(y_pred_grid, y_test)

array([[151,  19],
       [ 18, 142]])

In [29]:
accuracy_score(y_pred_grid, y_test)

0.8878787878787879

In [30]:
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.93      0.89      0.91       178
           1       0.88      0.93      0.90       152

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330

