In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from sklearn.neighbors import KNeighborsClassifier

In [2]:
data = np.load('../datasets/mnist_train_small.npy')
X = data[:, 1:]
y = data[:, 0]

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [4]:
model = KNeighborsClassifier()

In [5]:
model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [6]:
model.score(X_test[:100], y_test[:100])

0.95

In [7]:
class CustomKNN:
    
    def __init__(self, n_neighbours=5):
        self.n_neighbours = n_neighbours
        
    def fit(self, X, y):
        self._X = X.astype(np.int64)
        self._y = y

    def predict_point(self, point):
        
        list_dist = []
        
        for x_point, y_point in zip(self._X, self._y):
            dist_point = ((point - x_point) ** 2).sum()
            list_dist.append([dist_point, y_point])
        
        sorted_dist = sorted(list_dist)
        top_k = sorted_dist[:self.n_neighbours]
        
        items, counts = np.unique(np.array(top_k)[:, 1], return_counts=True)
        return(items[np.argmax(counts)])
    
    def predict(self, X):
        
        results = []
        
        for point in X:
            results.append(self.predict_point(point))
        
        return np.array(results, dtype=int)
    
    def score(self, X, y): 
        return sum(self.predict(X) == y) / len(y)
        

In [8]:
model = CustomKNN()

In [9]:
model.fit(X_train, y_train)

In [10]:
model.predict(X_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2])

In [12]:
y_test[:10]

array([7, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [11]:
model.score(X_test[:100], y_test[:100])

0.95