# Grid Search - KNN

Perform a basic grid search using common hyper-parameters for


In [13]:
import timeit
import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import KNeighborsClassifier

We are using the Iris dataset, but because we have more than 2 classifications the code would need to change a bit in order to use precision, recall, or f1 score. So this notebook is just accuracy, the others will be in another notebook.

In [21]:
score_metric = 'accuracy'

iris = datasets.load_iris()
X = iris.data
y = iris.target

kf = KFold(3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Common grid hyper parameter settings for KNN:

In [26]:
knn_pipe = Pipeline([('scaler', StandardScaler()), ('classifier', KNeighborsClassifier())])

grid_params = [{
    'scaler__with_mean': [True],
    'scaler__with_std': [True],
    'classifier__algorithm': ['ball_tree'], # 'auto', 'ball_tree', 'kd_tree', 'brute'
    'classifier__leaf_size': [186],
    'classifier__n_neighbors': [3],
    'classifier__weights': ['uniform', 'distance'] #'uniform', 'distance'
}]

grid = GridSearchCV(knn_pipe, grid_params, cv=kf, scoring = score_metric)

timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
score = grid.score(X_test, y_test)

print(f"KNN {score_metric} Score: {score}")
print("training seconds:", timing)
print("\nBest settings found in the grid search:")

print (grid.best_score_)
print (grid.best_params_)
print (grid.best_estimator_)

KNN accuracy Score: 0.98
training seconds: 0.04033157999992909

Best settings found in the grid search:
0.9197860962566845
{'classifier__algorithm': 'ball_tree', 'classifier__leaf_size': 186, 'classifier__n_neighbors': 3, 'classifier__weights': 'distance', 'scaler__with_mean': True, 'scaler__with_std': True}
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier',
                 KNeighborsClassifier(algorithm='ball_tree', leaf_size=186,
                                      n_neighbors=3, weights='distance'))])
