In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import NearestNeighbors
from scipy import stats as st

In [2]:
# MNIST
from sklearn.datasets import load_digits

digits = load_digits()
x = digits.data 
y = digits.target

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, test_size=0.3)

In [60]:
# kNN
k=6
model = NearestNeighbors(n_neighbors=k)
model.fit(x_train,y_train)
test_neighbors = model.kneighbors_graph(x_test)

In [50]:
# Slower Method
# test_nearest_neighbors looks through test_neighbors and gets the classifications of the k nearest ones
test_nearest_neighbors = np.empty((y_test.shape[0],k))
# y_pred gets the mode of the nearest neighbors
y_pred = np.empty((y_test.shape[0],1))

print("Total number of test points: " + str(x_test.shape[0]))
for i in range(x_test.shape[0]):
    if i%50 == 0:
        print("i=" + str(i))
    n = 0
    for j in range(x_train.shape[0]):
        if test_neighbors.toarray()[i][j] > 0:
            test_nearest_neighbors[i][n] = y_train[j]
            n += 1
    y_pred[i] = int(st.mode(test_nearest_neighbors[i], keepdims=True).mode)

Total number of test points: 540
i=0
i=50
i=100
i=150
i=200
i=250
i=300
i=350
i=400
i=450
i=500


In [61]:
# Faster method
# test_nearest_neighbors looks through test_neighbors and gets the classifications of the k nearest ones
test_nearest_neighbors = np.empty((y_test.shape[0],k))
# y_pred gets the mode of the nearest neighbors
y_pred = np.empty((y_test.shape[0],1))

print("Total number of test points: " + str(x_test.shape[0]))
for i in range(x_test.shape[0]):
    if i%50 == 0:
        print("i=" + str(i))
    test_nearest_neighbors[i] = y_train[np.nonzero(test_neighbors.toarray()[i])]
    y_pred[i] = int(st.mode(test_nearest_neighbors[i], keepdims=True).mode)

print("Done!")

Total number of test points: 540
i=0
i=50
i=100
i=150
i=200
i=250
i=300
i=350
i=400
i=450
i=500
Done!


In [62]:
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)
print("Accuracy: " + str(np.round(100*conf_matrix.trace()/conf_matrix.sum(),2)) + "%")

[[50  0  0  0  0  0  0  0  0  0]
 [ 0 50  0  0  0  0  0  0  0  0]
 [ 1  0 62  0  0  0  0  0  0  0]
 [ 0  0  0 53  0  0  0  0  0  0]
 [ 0  0  0  0 59  0  0  1  0  0]
 [ 0  0  0  0  0 46  1  0  0  0]
 [ 0  0  0  0  0  0 61  0  0  0]
 [ 0  0  0  0  0  0  0 59  0  0]
 [ 0  1  0  0  0  0  0  1 44  0]
 [ 0  0  0  1  1  1  0  0  1 47]]
Accuracy: 98.33%
