In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

X, y = fetch_openml('mnist_784', version=1, return_X_y=True, parser='auto')
X = X / 255.0
y = y.cat.codes

X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [None]:
def test_classifier(parameters):
    print(parameters)
    clf = make_pipeline(StandardScaler(),
                        KNeighborsClassifier(**parameters))
    clf.fit(X=X_train, y=y_train)
    y_pred = clf.predict(X_test)
    error = np.abs(y_pred - y_test).astype(bool)
    accuracy = 100 - (error.sum()*100/len(error))
    print(accuracy)
    print()
    return accuracy


hyp_params = {
    'n_neighbors': [5, 1, 50, 100, 500, 1000],
    'algorithm': ['ball_tree', 'kd_tree', 'brute'],
    'weights': ['uniform', 'distance'],
    'p': [2, 1]
}

def_hyp_params = {hyp: val_list[0] for hyp, val_list in hyp_params.items()}
results = {}

In [None]:
for hyp, val_list in hyp_params.items():
    print("Now tweaking {}".format(hyp))
    print()
    print()
    res_list = []
    def_hyp_params_copy = def_hyp_params.copy()
    for val in val_list[1:]:
        def_hyp_params_copy[hyp] = val
        print("Set {} to {}".format(hyp, val))
        res_list.append(test_classifier(parameters=def_hyp_params_copy))
    results[hyp] = res_list

Now tweaking n_neighbors


Set n_neighbors to 1
{'n_neighbors': 1, 'algorithm': 'ball_tree', 'weights': 'uniform', 'p': 2}
94.34

Set n_neighbors to 50
{'n_neighbors': 50, 'algorithm': 'ball_tree', 'weights': 'uniform', 'p': 2}
92.19

Set n_neighbors to 100
{'n_neighbors': 100, 'algorithm': 'ball_tree', 'weights': 'uniform', 'p': 2}
90.77

Set n_neighbors to 500
{'n_neighbors': 500, 'algorithm': 'ball_tree', 'weights': 'uniform', 'p': 2}
86.46000000000001

Set n_neighbors to 1000
{'n_neighbors': 1000, 'algorithm': 'ball_tree', 'weights': 'uniform', 'p': 2}
82.8

Now tweaking algorithm


Set algorithm to kd_tree
{'n_neighbors': 5, 'algorithm': 'kd_tree', 'weights': 'uniform', 'p': 2}
94.43

Set algorithm to brute
{'n_neighbors': 5, 'algorithm': 'brute', 'weights': 'uniform', 'p': 2}
94.43

Now tweaking weights


Set weights to distance
{'n_neighbors': 5, 'algorithm': 'ball_tree', 'weights': 'distance', 'p': 2}
94.5

Now tweaking p


Set p to 1
{'n_neighbors': 5, 'algorithm': 'ball_tree'

In [None]:
results

{'n_neighbors': [94.34, 92.19, 90.77, 86.46000000000001, 82.8],
 'algorithm': [94.43, 94.43],
 'weights': [94.5],
 'p': [95.73]}

Defining baseline accuracy...

In [None]:
baseline = test_classifier(def_hyp_params)

{'n_neighbors': 5, 'algorithm': 'ball_tree', 'weights': 'uniform', 'p': 2}
94.43

