In [2]:
import pandas as pd
import numpy as np
import seaborn as sns 

In [3]:
from sklearn.datasets import make_classification

In [4]:
X,y = make_classification(n_features=3,n_samples=1000,n_redundant=1,n_classes=2,random_state=99)
X.shape,y.shape

((1000, 3), (1000,))

In [5]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)

In [6]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((800, 3), (200, 3), (800,), (200,))

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [9]:
classifier = KNeighborsClassifier(n_neighbors=5,algorithm="auto")

In [10]:
classifier.fit(X_train,y_train)

In [13]:
y_predict = classifier.predict(X_test)

In [39]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [16]:
print("accuracy score is :", accuracy_score(y_test,y_predict))
print("classification report is :", classification_report(y_test,y_predict))
print("confusion matrix is :", confusion_matrix(y_test,y_predict))

accuracy score is : 0.945
classification report is :               precision    recall  f1-score   support

           0       0.95      0.95      0.95       109
           1       0.94      0.93      0.94        91

    accuracy                           0.94       200
   macro avg       0.94      0.94      0.94       200
weighted avg       0.94      0.94      0.94       200

confusion matrix is : [[104   5]
 [  6  85]]


In [17]:
## we want to change value of k

In [20]:
def cheack_value_k(X_train, X_test, y_train, y_test):
    accuracy = 0
    k = 0 
    for i in range(1,11):
        classifier = KNeighborsClassifier(n_neighbors=i, algorithm="auto")
        classifier.fit(X_train,y_train)
        y_pre = classifier.predict(X_test)
        new_accuracy = accuracy_score(y_pre,y_test)
        if new_accuracy > accuracy:
            accuracy = new_accuracy
            k = i
    return k, accuracy

In [21]:
k, acc = cheack_value_k(X_train,X_test,y_train,y_test)

In [23]:
k, acc

(6, 0.955)

In [24]:
## we can also use grid_search_cv 

In [25]:
##

In [26]:
from sklearn.model_selection import GridSearchCV

In [32]:
params = {
   "n_neighbors" : [1,2,3,4,5,6,7,8,9,10],
    "p" : [1,2],
    "weights" : ["uniform", "distance"] 
}

In [33]:
grid = GridSearchCV(KNeighborsClassifier(), param_grid= params , cv=4,scoring="accuracy",verbose=3)

In [34]:
grid.fit(X_train,y_train)

Fitting 4 folds for each of 40 candidates, totalling 160 fits
[CV 1/4] END n_neighbors=1, p=1, weights=uniform;, score=0.940 total time=   0.0s
[CV 2/4] END n_neighbors=1, p=1, weights=uniform;, score=0.955 total time=   0.0s
[CV 3/4] END n_neighbors=1, p=1, weights=uniform;, score=0.985 total time=   0.0s
[CV 4/4] END n_neighbors=1, p=1, weights=uniform;, score=0.980 total time=   0.0s
[CV 1/4] END n_neighbors=1, p=1, weights=distance;, score=0.940 total time=   0.0s
[CV 2/4] END n_neighbors=1, p=1, weights=distance;, score=0.955 total time=   0.0s
[CV 3/4] END n_neighbors=1, p=1, weights=distance;, score=0.985 total time=   0.0s
[CV 4/4] END n_neighbors=1, p=1, weights=distance;, score=0.980 total time=   0.0s
[CV 1/4] END n_neighbors=1, p=2, weights=uniform;, score=0.945 total time=   0.0s
[CV 2/4] END n_neighbors=1, p=2, weights=uniform;, score=0.955 total time=   0.0s
[CV 3/4] END n_neighbors=1, p=2, weights=uniform;, score=0.985 total time=   0.0s
[CV 4/4] END n_neighbors=1, p=2,

[CV 1/4] END n_neighbors=7, p=2, weights=uniform;, score=0.935 total time=   0.0s
[CV 2/4] END n_neighbors=7, p=2, weights=uniform;, score=0.955 total time=   0.0s
[CV 3/4] END n_neighbors=7, p=2, weights=uniform;, score=0.975 total time=   0.0s
[CV 4/4] END n_neighbors=7, p=2, weights=uniform;, score=0.995 total time=   0.0s
[CV 1/4] END n_neighbors=7, p=2, weights=distance;, score=0.945 total time=   0.0s
[CV 2/4] END n_neighbors=7, p=2, weights=distance;, score=0.965 total time=   0.0s
[CV 3/4] END n_neighbors=7, p=2, weights=distance;, score=0.985 total time=   0.0s
[CV 4/4] END n_neighbors=7, p=2, weights=distance;, score=0.995 total time=   0.0s
[CV 1/4] END n_neighbors=8, p=1, weights=uniform;, score=0.940 total time=   0.0s
[CV 2/4] END n_neighbors=8, p=1, weights=uniform;, score=0.955 total time=   0.0s
[CV 3/4] END n_neighbors=8, p=1, weights=uniform;, score=0.975 total time=   0.0s
[CV 4/4] END n_neighbors=8, p=1, weights=uniform;, score=0.990 total time=   0.0s
[CV 1/4] END

In [35]:
grid.best_params_

{'n_neighbors': 3, 'p': 1, 'weights': 'uniform'}

In [36]:
classifier = KNeighborsClassifier(n_neighbors=3,p=1,algorithm="auto",weights="uniform")

In [37]:
classifier.fit(X_train,y_train)

In [38]:
y_predict = classifier.predict(X_test)

In [40]:
print("accuracy score is = ", accuracy_score(y_test, y_predict))
print("confusion matrix is = ", confusion_matrix(y_test, y_predict))
print("classification report is = ", classification_report(y_test, y_predict))

accuracy score is =  0.945
confusion matrix is =  [[105   4]
 [  7  84]]
classification report is =                precision    recall  f1-score   support

           0       0.94      0.96      0.95       109
           1       0.95      0.92      0.94        91

    accuracy                           0.94       200
   macro avg       0.95      0.94      0.94       200
weighted avg       0.95      0.94      0.94       200



In [59]:
params = {
   "n_neighbors" : [1,6,7,8,9,10]
}
grid = GridSearchCV(KNeighborsClassifier(), param_grid= params , cv=4,scoring="accuracy",verbose=3)

In [60]:
grid.fit(X_train,y_train)

Fitting 4 folds for each of 6 candidates, totalling 24 fits
[CV 1/4] END .....................n_neighbors=1;, score=0.945 total time=   0.0s
[CV 2/4] END .....................n_neighbors=1;, score=0.955 total time=   0.0s
[CV 3/4] END .....................n_neighbors=1;, score=0.985 total time=   0.0s
[CV 4/4] END .....................n_neighbors=1;, score=0.980 total time=   0.0s
[CV 1/4] END .....................n_neighbors=6;, score=0.945 total time=   0.0s
[CV 2/4] END .....................n_neighbors=6;, score=0.955 total time=   0.0s
[CV 3/4] END .....................n_neighbors=6;, score=0.970 total time=   0.0s
[CV 4/4] END .....................n_neighbors=6;, score=0.995 total time=   0.0s
[CV 1/4] END .....................n_neighbors=7;, score=0.935 total time=   0.0s
[CV 2/4] END .....................n_neighbors=7;, score=0.955 total time=   0.0s
[CV 3/4] END .....................n_neighbors=7;, score=0.975 total time=   0.0s
[CV 4/4] END .....................n_neighbors=7;,

In [61]:
grid.best_params_

{'n_neighbors': 6}

In [66]:
classifier = KNeighborsClassifier(n_neighbors=6)

In [67]:
classifier.fit(X_train,y_train)

In [68]:
y_predict = classifier.predict(X_test)

In [69]:
print("accuracy score is = ", accuracy_score(y_test, y_predict))
print("confusion matrix is = ", confusion_matrix(y_test, y_predict))
print("classification report is = ", classification_report(y_test, y_predict))

accuracy score is =  0.955
confusion matrix is =  [[107   2]
 [  7  84]]
classification report is =                precision    recall  f1-score   support

           0       0.94      0.98      0.96       109
           1       0.98      0.92      0.95        91

    accuracy                           0.95       200
   macro avg       0.96      0.95      0.95       200
weighted avg       0.96      0.95      0.95       200

