In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [2]:
X, y = load_iris(return_X_y=True)

In [3]:
pipe = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', KNeighborsClassifier(n_neighbors=2, n_jobs=-1, weights='distance'))
])

If we think about the hyperparameters, which amount of neighbors is the best for optimizing the performance of our model?

One of sklearn tools used to find out is the GridSearchCV, with which one defines a domain of values for each hyperparameter one wants to optimize and finds the best combination.

In [4]:
pipe.get_params()

{'memory': None,
 'steps': [('scaler', MinMaxScaler()),
  ('model',
   KNeighborsClassifier(n_jobs=-1, n_neighbors=2, weights='distance'))],
 'verbose': False,
 'scaler': MinMaxScaler(),
 'model': KNeighborsClassifier(n_jobs=-1, n_neighbors=2, weights='distance'),
 'scaler__clip': False,
 'scaler__copy': True,
 'scaler__feature_range': (0, 1),
 'model__algorithm': 'auto',
 'model__leaf_size': 30,
 'model__metric': 'minkowski',
 'model__metric_params': None,
 'model__n_jobs': -1,
 'model__n_neighbors': 2,
 'model__p': 2,
 'model__weights': 'distance'}

In [6]:
mod = GridSearchCV(
    estimator=pipe,
    param_grid={
        'scaler': [MinMaxScaler(), StandardScaler()],
        'model__n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'model__weights': ['uniform', 'distance']
    },
    cv=3,
)

In [7]:
mod.fit(X, y)

In [10]:
pd.DataFrame(mod.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_neighbors,param_model__weights,param_scaler,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001755,0.000212,0.021318,0.001667,1,uniform,MinMaxScaler(),"{'model__n_neighbors': 1, 'model__weights': 'u...",0.98,0.94,0.94,0.953333,0.018856,23
1,0.001808,0.000313,0.017392,0.000276,1,uniform,StandardScaler(),"{'model__n_neighbors': 1, 'model__weights': 'u...",0.96,0.94,0.94,0.946667,0.009428,32
2,0.001184,2.7e-05,0.013427,3.5e-05,1,distance,MinMaxScaler(),"{'model__n_neighbors': 1, 'model__weights': 'd...",0.98,0.94,0.94,0.953333,0.018856,23
3,0.001402,0.000107,0.013969,0.000132,1,distance,StandardScaler(),"{'model__n_neighbors': 1, 'model__weights': 'd...",0.96,0.94,0.94,0.946667,0.009428,32
4,0.001171,4.2e-05,0.016755,0.000789,2,uniform,MinMaxScaler(),"{'model__n_neighbors': 2, 'model__weights': 'u...",0.98,0.92,0.9,0.933333,0.033993,39
5,0.001748,0.000689,0.018401,0.00144,2,uniform,StandardScaler(),"{'model__n_neighbors': 2, 'model__weights': 'u...",0.96,0.92,0.9,0.926667,0.024944,40
6,0.003373,0.000885,0.027476,0.004672,2,distance,MinMaxScaler(),"{'model__n_neighbors': 2, 'model__weights': 'd...",0.98,0.94,0.94,0.953333,0.018856,23
7,0.001958,0.000178,0.014475,0.000408,2,distance,StandardScaler(),"{'model__n_neighbors': 2, 'model__weights': 'd...",0.96,0.94,0.94,0.946667,0.009428,32
8,0.001291,5e-05,0.017348,0.000511,3,uniform,MinMaxScaler(),"{'model__n_neighbors': 3, 'model__weights': 'u...",0.98,0.96,0.94,0.96,0.01633,14
9,0.001535,0.000422,0.018176,0.001136,3,uniform,StandardScaler(),"{'model__n_neighbors': 3, 'model__weights': 'u...",0.98,0.94,0.9,0.94,0.03266,38


In [11]:
mod.best_params_

{'model__n_neighbors': 6,
 'model__weights': 'uniform',
 'scaler': MinMaxScaler()}