# Grid Search - KNN

Perform a basic grid search using common hyper-parameters for


In [1]:
import timeit
import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import KNeighborsClassifier

We are using the Iris dataset, but because we have more than 2 classifications the code would need to change a bit in order to use precision, recall, or f1 score. So this notebook is just accuracy, the others will be in another notebook.

In [2]:
score_metrics = ['accuracy', 'precision', 'recall', 'f1']

iris = datasets.load_iris()

X = iris.data[:100]
y = iris.target[:100]

kf = KFold(3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Common grid hyper parameter settings for KNN:

In [3]:
for score_metric in score_metrics:
    print(f"\n\n ----------------    KNN {score_metric}  ---------------")
    
    knn_pipe = Pipeline([('scaler', StandardScaler()), ('classifier', KNeighborsClassifier())])

    grid_params = [{
        'scaler__with_mean': [True],
        'scaler__with_std': [True],
        'classifier__algorithm': ['auto','ball_tree','kd_tree','brute'], # 'auto', 'ball_tree', 'kd_tree', 'brute'
        'classifier__leaf_size': range(1,10,5),
        'classifier__n_neighbors': range(1,16,4),
        'classifier__weights': ['uniform', 'distance'] #'uniform', 'distance'
    }]

    grid = GridSearchCV(knn_pipe, grid_params, cv=kf, scoring = score_metric)

    timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
    score = grid.score(X_test, y_test)

    print(f"KNN {score_metric}: {score}")
    print("seconds:", timing)
    print (grid.best_score_)
    print (grid.best_params_)
    print (grid.best_estimator_)



 ----------------    KNN accuracy  ---------------
KNN accuracy: 1.0
seconds: 0.5299002040000005
1.0
{'classifier__algorithm': 'auto', 'classifier__leaf_size': 1, 'classifier__n_neighbors': 1, 'classifier__weights': 'uniform', 'scaler__with_mean': True, 'scaler__with_std': True}
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 KNeighborsClassifier(leaf_size=1, n_neighbors=1))])


 ----------------    KNN precision  ---------------
KNN precision: 1.0
seconds: 0.7331441989999803
1.0
{'classifier__algorithm': 'auto', 'classifier__leaf_size': 1, 'classifier__n_neighbors': 1, 'classifier__weights': 'uniform', 'scaler__with_mean': True, 'scaler__with_std': True}
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 KNeighborsClassifier(leaf_size=1, n_neighbors=1))])


 ----------------    KNN recall  ---------------
KNN recall: 1.0
seconds: 0.658599871999968
1.0
{'classifier__algorithm': 'auto', 'classifie