# Import basic libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

# Feature selection

In [3]:
X, y = mnist["data"], mnist["target"]
X.shape

(70000, 784)

In [4]:
y.shape

(70000,)

# Train, test split

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Hyper parameter tuning using GridSearchCV

In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

param_grid = [{'weights': ["uniform", "distance"], 'n_neighbors': [3, 4, 5]}]

knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid, cv=5, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END .................n_neighbors=3, weights=uniform; total time=  12.3s
[CV 2/5] END .................n_neighbors=3, weights=uniform; total time=  10.6s
[CV 3/5] END .................n_neighbors=3, weights=uniform; total time=  10.3s
[CV 4/5] END .................n_neighbors=3, weights=uniform; total time=  10.4s
[CV 5/5] END .................n_neighbors=3, weights=uniform; total time=  10.7s
[CV 1/5] END ................n_neighbors=3, weights=distance; total time=  10.0s
[CV 2/5] END ................n_neighbors=3, weights=distance; total time=  10.6s
[CV 3/5] END ................n_neighbors=3, weights=distance; total time=  12.1s
[CV 4/5] END ................n_neighbors=3, weights=distance; total time=  11.8s
[CV 5/5] END ................n_neighbors=3, weights=distance; total time=  12.9s
[CV 1/5] END .................n_neighbors=4, weights=uniform; total time=  14.6s
[CV 2/5] END .................n_neighbors=4, weig

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid=[{'n_neighbors': [3, 4, 5],
                          'weights': ['uniform', 'distance']}],
             verbose=3)

In [7]:
grid_search.best_params_

{'n_neighbors': 4, 'weights': 'distance'}

In [8]:
grid_search.best_score_

0.9717523809523809

In [9]:
from sklearn.metrics import accuracy_score

y_pred = grid_search.predict(X_test)
accuracy_score(y_test, y_pred)

0.9719428571428571

In [10]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, 
                            target_names = np.unique(y)))

              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1714
           1       0.96      1.00      0.98      1977
           2       0.98      0.96      0.97      1761
           3       0.97      0.97      0.97      1806
           4       0.97      0.97      0.97      1587
           5       0.97      0.97      0.97      1607
           6       0.98      0.99      0.99      1761
           7       0.96      0.97      0.97      1878
           8       0.99      0.94      0.96      1657
           9       0.95      0.96      0.96      1752

    accuracy                           0.97     17500
   macro avg       0.97      0.97      0.97     17500
weighted avg       0.97      0.97      0.97     17500

