In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [12]:
data = np.load("./datasets/mnist_train_small.npy")

In [13]:
X = data[:, 1:]
y = data[:, 0]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

## Custom KNN implementation

In [35]:
class CustomKNN:
    
    def __init__(self, n_neighbours=5):
        self.n_neighbours = n_neighbours
    
    def fit(self, X, y):
        self._X = (X.astype(np.int64) - X.mean()) / X.std()
        self._y = y
        
    def predict_point(self, point):
        
        distances = []
        for x_point, y_point in zip(self._X, self._y):
            dist = ((point-x_point)**2).sum()
            distances.append([dist, y_point])
        
        sorted_dist = sorted(distances)
        top_k = sorted_dist[:self.n_neighbours]
        
        items, counts = np.unique(np.array(top_k)[:, 1], return_counts=True)
        ans = items[np.argmax(counts)]
        
        return ans
    
    def predict(self, X):
        
        results = []
        X = (X - X.mean()) / X.std()
        for point in X:
            results.append(self.predict_point(point))
        
        return np.array(results, dtype=int)
    
    def score(self, X, y):
        
        return sum(self.predict(X) == y) / len(y)
        

In [36]:
model1 = CustomKNN()

In [37]:
model1.fit(X_train, y_train)

In [38]:
model1.predict(X_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2])

In [39]:
y_test[:10]

array([7, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [40]:
model1.score(X_test[:100], y_test[:100])

0.95

## KNN from sklearn

In [15]:
from sklearn.neighbors import KNeighborsClassifier

In [16]:
model = KNeighborsClassifier()

In [17]:
model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [18]:
model.predict(X_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [19]:
y_test[:10]

array([7, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [20]:
model.score(X_test[:100], y_test[:100])

0.95