## Importing Libraries

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

## Custom implementation of KNN

In [2]:
class CustomKNN:
    # choosing neighbours as odd to avoid ties
    def __init__(self, k=5):
        self.k = k

    def fit(self,X,y):
        self._X = X.astype(np.int64)
        self._y = y

    # given a single point, it predicts which class it belongs to 
    def predict_pt(self,pt):
        # list for storing the distance of given point "pt" from each point in training data
        list_pt=[]

        for x_pt,y_pt in zip(self._X ,self._y):
            # calculation of Euclidean distance
            euclidean_dist =((pt - x_pt)**2).sum()
            # this list consists of sublists which consist of distance from given point and the class to which it belongs to
            list_pt.append([euclidean_dist,y_pt])

        # sorting the calculated distances in ascending order based on distance values
        sorted_dist =sorted(list_pt)
        # getting top k rows from the sorted array
        top_k = sorted_dist[:self.k]
        # Get the most frequent class of these rows
        # [:,1] because we want unique values of class & not dist(by default)
        items,counts = np.unique(np.array(top_k)[:,1],return_counts=True)
        #argmax will return the indices of the maximum values along an axis
        res = items[np.argmax(counts)]
        return res

    # to provide answer for each number in the array 
    def predict(self,X):
        results = []
        for pt in X:
            results.append(self.predict_pt(pt))
        return np.array(results,dtype= int)
        
    # to calculate the accuracy of our model
    def accuracy(self,X,y):
        return sum(self.predict(X)==y)/len(y)
        

## Getting Data & Selecting Features

In [3]:
data = np.load("./dataset/mnist_train_small.npy")
X,y = data[:, 1:], data[:, 0]

## Model Building

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
model = CustomKNN(k=3)
model.fit(X_train ,y_train)

In [6]:
model.predict(X_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2])

In [7]:
y_test[:10]

array([7, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [8]:
model.accuracy(X_test[:100], y_test[:100])

0.96