In [None]:
from sklearn.datasets import make_blobs
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
from collections import Counter
from tqdm import tqdm

In [None]:
X, y = make_blobs(n_samples=5000,
                  n_features=2,
                  centers=3,
                  shuffle=True,
                  random_state=42)

In [None]:
class KNearestNeighbors:

  def __init__(self,X,y,k=3):
    self.X = X
    self.y = y
    self.k = k

  def Xsplit(self,X,y):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.75,
                                                        shuffle=True,
                                                        random_state=42)
    return X_train,X_test,y_train,y_test

  def normalize_Xtrain(self,X):
    mean = np.mean(X,0)
    std = np.std(X,0)
    return (X-mean)/std,mean,std

  def normalize_Xtest(self,X,mean,std):
    return (X-mean)/std

  def kNearestalgo(self,X_train,y_train,k):
    predicted = []
    for i in tqdm(range(X_train.shape[0])):
      euc_distances = np.sqrt(np.sum((X_train[i] - X_train)**2,1))
      indices = np.argsort(euc_distances)
      y_shuffled = y_train[indices]
      output_classes = y_shuffled[1 : k+1]
      class_count = Counter(output_classes)
      predicted_class = max(class_count,key=class_count.get)
      predicted.append(predicted_class)

    return predicted

  def predict(self,X_test,y_test,k):
    X_train, X_test, y_train, y_test = self.Xsplit(self.X,self.y)
    train_pred = np.array(self.kNearestalgo(X_train,y_train,k))
    test_pred = []

    for data_pt in X_test:
      euc_distance = np.sqrt(np.sum((data_pt-X_train)**2,1))
      indices = np.argsort(euc_distance)
      shuffled_pred_train = train_pred[indices]
      top_k_classes = shuffled_pred_train[1 : k+1]
      class_count = Counter(top_k_classes)
      predicted_class = max(class_count,key=class_count.get)
      test_pred.append(predicted_class)

    df = pd.DataFrame({'actual' : y_test,
                      'predicted' : test_pred})
    print(df.shape)

  def execute(self):
    X_train, X_test, y_train, y_test = self.Xsplit(self.X,self.y)
    X_train, mean, std = self.normalize_Xtrain(X_train)
    self.predict(X_train,y_train,self.k)

In [None]:
km = KNearestNeighbors(X,y,5)

In [None]:
km.execute()