In [128]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sklearn.datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,accuracy_score

# for pipeline
from sklearn.pipeline import make_pipeline

In [129]:
# Load and split data

df=datasets.load_iris()
X=df.data
y=df.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,stratify=y,random_state=42)

## No pipelines

In [135]:
# Standardize
sc=StandardScaler()
X_train_std=sc.fit_transform(X_train)
X_test_std=sc.transform(X_test)

# Call Knn
knn=KNeighborsClassifier(n_neighbors=5,weights='uniform',p=2)
model=knn.fit(X_train_std,y_train)
y_pred=model.predict(X_test_std)

print('Accuracy of train is %.3f'% knn.score(X_train_std,y_train))
print('Accuracy of test is %.3f'% knn.score(X_test_std,y_test))

print(classification_report(y_test,y_pred))

Accuracy of train is 0.981
Accuracy of test is 0.911
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.79      1.00      0.88        15
           2       1.00      0.73      0.85        15

    accuracy                           0.91        45
   macro avg       0.93      0.91      0.91        45
weighted avg       0.93      0.91      0.91        45



## With Pipelines

In [172]:
sc=StandardScaler()
pipeline=make_pipeline(sc,KNeighborsClassifier())

# GridSearchCV
param_grid=[{'kneighborsclassifier__n_neighbors':[3,4,5,6,7,8],
             'kneighborsclassifier__p':[1,2],
             'kneighborsclassifier__weights':['uniform','distance']
             }]

gs=GridSearchCV(pipeline,param_grid=param_grid,scoring='accuracy',cv=10,n_jobs=2,refit=True,verbose=1)

#Fit model
gs.fit(X_train,y_train)

print('Best train score is %.3f'% gs.best_score_)
print('Best parameters are', gs.best_params_)
print('Best test score is %.3f'% gs.score(X_test,y_test))

y_pred=gs.predict(X_test)
print(classification_report(y_test,y_pred))


Fitting 10 folds for each of 24 candidates, totalling 240 fits
Best train score is 0.972
Best parameters are {'kneighborsclassifier__n_neighbors': 5, 'kneighborsclassifier__p': 1, 'kneighborsclassifier__weights': 'uniform'}
Best test score is 0.911
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.79      1.00      0.88        15
           2       1.00      0.73      0.85        15

    accuracy                           0.91        45
   macro avg       0.93      0.91      0.91        45
weighted avg       0.93      0.91      0.91        45

