In [38]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import os
from sklearn.datasets import fetch_openml
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score
from sklearn.multiclass import OneVsOneClassifier
from sklearn.preprocessing import StandardScaler

In [3]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

In [4]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000].astype(int), y[60000:].astype(int)
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [28]:
knn_clf = KNeighborsClassifier()
knn_clf.get_params().keys()

dict_keys(['algorithm', 'leaf_size', 'metric', 'metric_params', 'n_jobs', 'n_neighbors', 'p', 'weights'])

In [29]:
param_grid = [
    {'n_neighbors':[3,5,7,9],'weights':['uniform','distance']}
]

In [31]:
grid_search = GridSearchCV(knn_clf,param_grid,scoring='neg_mean_squared_error', cv=6, n_jobs=os.cpu_count())

In [32]:
grid_search.fit(X_train,y_train)

GridSearchCV(cv=6, error_score='raise-deprecating',
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='warn', n_jobs=12,
             param_grid=[{'n_neighbors': [3, 5, 7, 9],
                          'weights': ['uniform', 'distance']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [33]:
grid_search.best_params_

{'n_neighbors': 5, 'weights': 'distance'}

In [34]:
grid_search.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='distance')

In [35]:
cvres = grid_search.cv_results_

In [37]:
for mean_score, params in zip(cvres["mean_test_score"],cvres["params"]):
    print(np.sqrt(-mean_score),params)

0.9415943925066674 {'n_neighbors': 3, 'weights': 'uniform'}
0.9138107024980611 {'n_neighbors': 3, 'weights': 'distance'}
0.9333273809334001 {'n_neighbors': 5, 'weights': 'uniform'}
0.9109884741312593 {'n_neighbors': 5, 'weights': 'distance'}
0.946766426668514 {'n_neighbors': 7, 'weights': 'uniform'}
0.9318977054019036 {'n_neighbors': 7, 'weights': 'distance'}
0.9686158509268092 {'n_neighbors': 9, 'weights': 'uniform'}
0.9513674369033239 {'n_neighbors': 9, 'weights': 'distance'}


In [39]:
best = KNeighborsClassifier(n_jobs=os.cpu_count(), n_neighbors=5, 
                     weights='distance')
best.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=12, n_neighbors=5, p=2,
                     weights='distance')

In [None]:
accuracy_score(y_test,best.predict(X_test))