In [2]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn.datasets import make_classification

x,y= make_classification(
    n_samples=1000,
    n_classes=2,
    n_redundant=1,
    n_features=3,
    random_state=22
)

In [4]:
x

array([[-0.91361735, -1.6607678 ,  0.17242566],
       [-1.12654374,  0.82559918,  0.37380636],
       [-0.94126783, -0.92784192,  0.22157999],
       ...,
       [-1.05651429,  0.0558829 ,  0.31026838],
       [ 1.09489437,  1.5417576 , -0.23180002],
       [-1.12376011, -0.34012717,  0.30760137]], shape=(1000, 3))

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
x_train , x_test , y_train , y_test= train_test_split(x,y , test_size=0.3 , random_state=33)

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# kvalue by default is 5
# p value is 2, indicates euclidean distance
# algorithms : ball_tree or kd_tree or brute
# leaf size: child of the tree
# weight : 'uniform' , 'distance'(default )

In [10]:
knn=KNeighborsClassifier(n_neighbors=5 , algorithm='auto')
knn.fit(x_train , y_train)

In [11]:
y_pred=knn.predict(x_test)

In [12]:
from sklearn.metrics import classification_report, accuracy_score , confusion_matrix
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

0.9833333333333333
              precision    recall  f1-score   support

           0       0.97      1.00      0.98       151
           1       1.00      0.97      0.98       149

    accuracy                           0.98       300
   macro avg       0.98      0.98      0.98       300
weighted avg       0.98      0.98      0.98       300

[[151   0]
 [  5 144]]


In [24]:
param_grid = {
    'n_neighbors': list(range(1, 31)) ,
     'algorithm' :['ball_tree' , 'kd_tree' , 'brute']
}

In [25]:
# perform grid search cv
from sklearn.model_selection import GridSearchCV
gscv=GridSearchCV(estimator=knn , param_grid=param_grid , cv=5 , scoring='accuracy' , n_jobs=-1)

In [26]:
gscv.fit(x_train , y_train)

In [27]:
gscv.best_estimator_

In [28]:
gscv.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 7}

In [29]:
gscv.best_score_

np.float64(0.9885714285714287)

In [None]:
#predicting on test data
best_knn = gscv.best_estimator_
y_pred = best_knn.predict(x_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       151
           1       1.00      0.97      0.98       149

    accuracy                           0.98       300
   macro avg       0.98      0.98      0.98       300
weighted avg       0.98      0.98      0.98       300

