# **K-Nearest Neighbors (KNN) Class Implementation in Python**

In [1]:
# required libraries
from sklearn.datasets import load_iris
import numpy as np
import torch

In [45]:
class KNearestNeighbor():
  def __init__(self,X_train,y_train,X_test,y_test,k = 5):
    self.__X_train = X_train                                        # training dataset
    self.__X_test = X_test                                          # data/point to predict class/es
    self.__y_train = y_train                                        # training label
    self.__y_test = y_test                                          # actual predictions for test data
    self.__trainN = self.__X_train.shape[0]                         # number of trainig samples
    self.__d = self.__X_train.shape[1]                              # number of features
    self.__testN = self.__X_test.shape[0]                           # number of testing samples
    self.__k = k                                                    # neighbors to consider
    self.__K = torch.unique(self.__y_train).size(0)                 # number of classes
    self.__y_pred = torch.tensor(self.predict())                    # predict for train data



  @classmethod
  def train_test_split(cls,X,y,split_ratio,k = 5):
    # select random indices
    indices = torch.randperm(X.size(0))

    train_N = int(split_ratio * X.size(0))

    train_indices = indices[:train_N]
    test_indices = indices[train_N:]

    X_train = X[train_indices]
    y_train = y[train_indices]

    X_test = X[test_indices]
    y_test = y[test_indices]

    return cls(X_train,y_train,X_test,y_test,k)

  def euclidean_distance(self,point1,point2):
      try:
        n = len(point1)
        distance = 0
        for i in range(n):
          distance += (point1[i] - point2[i])**2
        return torch.sqrt(distance)
      except Exception as e:                                       # Raise Exception if point1 and point2 are of different dimension
        print(f'Exception: {type(e).__name__} - {e}')

  def predict(self):
    y_pred = []
    for i in range(self.__testN):
      test = self.__X_test[i]
      distances = []
      distance = 0
      for j in range(self.__trainN):
        train = self.__X_train[j]
        distance = self.euclidean_distance(test,train)
        distances.append((self.__y_train[j].item(),distance.item()))
      distances.sort(key=lambda x:x[1],reverse=False)
      y_pred.append(self.majority_vote(distances[:self.__k]))
    return y_pred

  def majority_vote(self,distances):
    category = [d[0] for d in distances]                          # access the first element from distances i.e the class
    pred_cat = max(set(category),key=category.count)              # get the class based on number of count of that class

    return pred_cat

  def get_accuracy(self):
    return ((self.__y_pred == self.__y_test).sum() / self.__testN).item()

  def get_confusion_matrix(self):
    confusion_matrix = torch.zeros(self.__K,self.__K)
    for t,p in zip(self.__y_test,self.__y_pred):
        confusion_matrix[t,p] += 1
    return confusion_matrix

# **Evaluating KNN Class on Iris Dataset**

In [46]:
iris = load_iris()
X  = torch.from_numpy(iris.data)
y = torch.from_numpy(iris.target)

obj = KNearestNeighbor.train_test_split(X,y,0.8)
result =  obj.predict()
print(obj.get_accuracy())
print(obj.get_confusion_matrix())


0.9666666388511658
tensor([[ 8.,  0.,  0.],
        [ 0.,  8.,  0.],
        [ 0.,  1., 13.]])
