In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split


In [2]:
digits = load_digits()
n_obs = digits.images.shape[0]

# shape: n_obs x n_features
X = digits.images.reshape((n_obs,-1))
# shape: n_obs
y = digits.target

n_classes = len(np.unique(digits.target))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

In [4]:
param_grid =[{'n_neighbors': range(1,6)}]
gs = GridSearchCV(KNeighborsClassifier(1), param_grid, cv=5, scoring='accuracy')
gs.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'n_neighbors': range(1, 6)}], pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring='accuracy',
       verbose=0)

In [5]:
means = gs.cv_results_['mean_test_score']
stds = gs.cv_results_['std_test_score']
times = gs.cv_results_['mean_fit_time']

for (mean,std,time,param) in zip(means,stds,times,gs.cv_results_['params']):
    print('score: %0.3f (+-%0.3f) time: %4.2e params: %s' % (mean,std,time,param))

score: 0.983 (+-0.006) time: 1.65e-03 params: {'n_neighbors': 1}
score: 0.978 (+-0.006) time: 1.50e-03 params: {'n_neighbors': 2}
score: 0.981 (+-0.002) time: 1.54e-03 params: {'n_neighbors': 3}
score: 0.975 (+-0.006) time: 1.58e-03 params: {'n_neighbors': 4}
score: 0.977 (+-0.009) time: 1.53e-03 params: {'n_neighbors': 5}


In [7]:
# we can just call predict on gs: it will be used the best built model
# according to metric
y_p = gs.predict(X_test)

print(confusion_matrix(y_test, y_p))
print('%.2f' % accuracy_score(y_test, y_p))

[[55  0  0  0  0  0  0  0  0  0]
 [ 0 55  0  0  0  0  0  0  0  0]
 [ 0  0 52  0  0  0  0  0  0  0]
 [ 0  0  0 55  0  0  0  0  1  0]
 [ 0  1  0  0 63  0  0  0  0  0]
 [ 0  0  0  0  0 72  0  0  0  1]
 [ 0  0  0  0  0  0 57  0  0  0]
 [ 0  0  0  0  0  0  0 61  0  1]
 [ 0  1  0  0  0  0  0  0 50  1]
 [ 0  0  0  2  1  0  0  0  0 65]]
0.98


In [9]:
# check:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
y_predict = knn.predict(X_test)
print(confusion_matrix(y_test, y_predict))
print('%.2f' % accuracy_score(y_test, y_predict))

[[55  0  0  0  0  0  0  0  0  0]
 [ 0 55  0  0  0  0  0  0  0  0]
 [ 0  0 52  0  0  0  0  0  0  0]
 [ 0  0  0 55  0  0  0  0  1  0]
 [ 0  1  0  0 63  0  0  0  0  0]
 [ 0  0  0  0  0 72  0  0  0  1]
 [ 0  0  0  0  0  0 57  0  0  0]
 [ 0  0  0  0  0  0  0 61  0  1]
 [ 0  1  0  0  0  0  0  0 50  1]
 [ 0  0  0  2  1  0  0  0  0 65]]
0.98
