## K Nearest Neighbour Algorithm (Classifier)

In [18]:
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns 

In [42]:
from sklearn.datasets import make_classification
x,y = make_classification(
    n_samples=10000,
    n_features=3,
    n_redundant=1,
    n_classes = 2,
    random_state=500
    )

In [22]:
x

array([[ 1.48171343, -0.22462449, -0.62053298],
       [ 1.58052189, -0.54925104, -0.68827874],
       [ 1.03537668, -1.11287964, -0.51500277],
       ...,
       [-1.31309762, -1.56576142,  0.39964927],
       [-0.83359837,  2.06523648,  0.51419347],
       [-1.80973168, -1.45028435,  0.61105864]])

In [43]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=42)

In [44]:
# Model traning with Ball tree algorithm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix,classification_report
k_neighbour_classifier = KNeighborsClassifier(n_neighbors=5,algorithm='ball_tree',p=2)
k_neighbour_classifier.fit(x_train,y_train)
y_pred = k_neighbour_classifier.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.88      0.87      0.88      1261
           1       0.87      0.88      0.88      1239

    accuracy                           0.88      2500
   macro avg       0.88      0.88      0.88      2500
weighted avg       0.88      0.88      0.88      2500

[[1095  166]
 [ 146 1093]]


In [46]:
# Model traning with KD tree algorithm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix,classification_report
k_neighbour_classifier = KNeighborsClassifier(n_neighbors=1,algorithm='kd_tree')
k_neighbour_classifier.fit(x_train,y_train)
y_pred = k_neighbour_classifier.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.83      0.81      0.82      1261
           1       0.81      0.83      0.82      1239

    accuracy                           0.82      2500
   macro avg       0.82      0.82      0.82      2500
weighted avg       0.82      0.82      0.82      2500

[[1022  239]
 [ 211 1028]]


In [None]:
# Hyper parameter tuning 
params_grid = {
    'n_neighbors':[1,2,3,4,5,6],
    'algorithm':['ball_tree', 'kd_tree', 'brute'],
    'p':[1,2]
}

In [48]:
from sklearn.model_selection import GridSearchCV
grid_knclassifier = GridSearchCV(estimator=KNeighborsClassifier(),param_grid=params_grid,refit=True,cv=5,)
grid_knclassifier.fit(x_train,y_train)


0,1,2
,estimator,KNeighborsClassifier()
,param_grid,"{'algorithm': ['ball_tree', 'kd_tree', ...], 'n_neighbors': [1, 2, ...], 'p': [1, 2]}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'ball_tree'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [None]:
grid_knclassifier.best_params_
# this are the best parameters use by model

{'algorithm': 'ball_tree', 'n_neighbors': 5, 'p': 2}

In [49]:
from sklearn.metrics import confusion_matrix,classification_report
y_pred = grid_knclassifier.predict(x_test)
report = classification_report(y_test,y_pred)
metrix = confusion_matrix(y_test,y_pred)
print(report)
print(metrix)

              precision    recall  f1-score   support

           0       0.88      0.87      0.88      1261
           1       0.87      0.88      0.88      1239

    accuracy                           0.88      2500
   macro avg       0.88      0.88      0.88      2500
weighted avg       0.88      0.88      0.88      2500

[[1095  166]
 [ 146 1093]]


## ## K Nearest Neighbour Algorithm (Regression)

In [51]:
from sklearn.datasets import make_regression
x,y = make_regression(n_samples=10000,n_features=2,noise=15,random_state=50)

In [52]:
# spliting the train and test data
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.30,random_state=500)

In [None]:
# traning the model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score,mean_absolute_error
kn_regressor = KNeighborsRegressor(n_neighbors=5)
kn_regressor.fit(x_train,y_train)
y_pred = kn_regressor.predict(x_test)
score = r2_score(y_test,y_pred)
error = mean_absolute_error(y_test,y_pred)
print(score)
print(error)
# it is giving the good result

0.9735127157524367
13.440179128358842


In [55]:
# Hyperparameter tuning in KNeighbourRegressor 
params_grid = {
    'n_neighbors':[1,2,3,4,5,6],
    'algorithm':['ball_tree', 'kd_tree', 'brute'],
    'p':[1,2]
}

In [56]:
from sklearn.model_selection import GridSearchCV
grid_knregressor = GridSearchCV(estimator=KNeighborsRegressor(),param_grid=params_grid,refit=True,cv=5,)
grid_knregressor.fit(x_train,y_train)

0,1,2
,estimator,KNeighborsRegressor()
,param_grid,"{'algorithm': ['ball_tree', 'kd_tree', ...], 'n_neighbors': [1, 2, ...], 'p': [1, 2]}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,6
,weights,'uniform'
,algorithm,'ball_tree'
,leaf_size,30
,p,1
,metric,'minkowski'
,metric_params,
,n_jobs,


In [59]:
grid_knregressor.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 6, 'p': 1}

In [58]:
y_pred = grid_knregressor.predict(x_test)
score = r2_score(y_test,y_pred)
error = mean_absolute_error(y_test,y_pred)
print(score)
print(error)

0.9740825162311569
13.23486367981751
