In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from scipy.spatial.distance import euclidean

In [2]:
#loading data
dataset=load_digits()
X_data=dataset.data
y_data=dataset.target
#split the data to train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

##### Implementing KNN Algorithm from scratch.
Algorithm:
1. Find the euclidean distance of a point to each point in training data.
2. Sort the distances.
3. Take the top K items and find the most occuring class and return.


In [3]:
class kNeighbor_Classifier():
    #constructor for initialising variables
    def __init__(self,k_neighbors):
        self.k_neighbors=k_neighbors
    
    def fit(self,X,y):
        self.X=X
        self.y=y
    
    def predict_Single_Point(self,point):
        """Given a single point predicts to which class it belongs to
        Inputs:point,the point to be predicted
        Outputs:The class to which it belongs to """
        #1.find the euclidean distance of a point to each point in training data
        distances=[]
        for xpoint,ypoint in zip(self.X,self.y):
            distance=euclidean(xpoint,point)
            distances.append([distance,ypoint])
        #2.sort the list according to distance
        sorted_list=sorted(distances)
        k_neighbors_list=sorted_list[:self.k_neighbors]
        #3.now taking the most common class from the top k neighbors
        dist,classs=np.unique(np.array(k_neighbors_list)[:,1],return_counts=True)
        answer=dist[np.argmax(classs)]
        return answer
    
    def predict(self,X):
        """takes a two dimensional array and classifies each point in an array"""
        predicted=[]
        for point in X:
            predicted.append(self.predict_Single_Point(point))
        return np.array(predicted)


*Predictions Using Custom Implementation Of KNN*

In [4]:
model = kNeighbor_Classifier(10)
model.fit(X_train, y_train)
pred = model.predict(X_test)
print("Accuracy using custom implementation is    {}".format(accuracy_score(y_test,pred)))

Accuracy using custom implementation is    0.9833333333333333


*Predictions using Sklearn Model*

In [5]:
sklearn_model=KNeighborsClassifier()
sklearn_model.fit(X_train,y_train)
sklearn_pred=sklearn_model.predict(X_test)
print("Accuracy using Sklearn Model is    {}".format(accuracy_score(y_test,sklearn_pred)))

Accuracy using Sklearn Model is    0.9861111111111112
