In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler

In [2]:
wine = pd.read_csv('wine_sample.csv')

In [3]:
X = wine[['sulphates', 'alcohol']]
y = wine[['type']]

# Create training/testing split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)

# Scale the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Initialize k-nearest neighbors model
knnModel = KNeighborsClassifier()

# Create tuning grid
k = {'n_neighbors': [3, 5, 7, 9, 11]}

# Initialize tuning grid and fit to training data
knnTuning = GridSearchCV(knnModel, k)
knnTuning.fit(X_train, np.ravel(y_train))

In [5]:
# All available results
knnTuning.cv_results_

{'mean_fit_time': array([0.00056744, 0.00070443, 0.0007031 , 0.00070143, 0.00056062]),
 'std_fit_time': array([1.33240367e-04, 1.68142519e-04, 1.39030621e-04, 8.54059146e-05,
        9.91338145e-05]),
 'mean_score_time': array([0.00187716, 0.00250239, 0.00217857, 0.00208035, 0.00180159]),
 'std_score_time': array([0.00032663, 0.00079055, 0.00065913, 0.0002779 , 0.00041065]),
 'param_n_neighbors': masked_array(data=[3, 5, 7, 9, 11],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 3},
  {'n_neighbors': 5},
  {'n_neighbors': 7},
  {'n_neighbors': 9},
  {'n_neighbors': 11}],
 'split0_test_score': array([0.75      , 0.69444444, 0.72222222, 0.75      , 0.75      ]),
 'split1_test_score': array([0.69444444, 0.66666667, 0.69444444, 0.72222222, 0.69444444]),
 'split2_test_score': array([0.69444444, 0.72222222, 0.69444444, 0.66666667, 0.69444444]),
 'split3_test_score': array([0.66666667, 0.61111111, 0.611111

In [6]:
# Mean testing score for each k and best model
print('Mean testing scores:', knnTuning.cv_results_['mean_test_score'])
print('Best estimator:', knnTuning.best_estimator_)

Mean testing scores: [0.67222222 0.64444444 0.65       0.65555556 0.66111111]
Best estimator: KNeighborsClassifier(n_neighbors=3)
