<a href="https://colab.research.google.com/github/nirezuluet/ML-from-scratch-/blob/main/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# implementation of KNN from scratch

import numpy as np
import math
from collections import Counter

In [7]:
def euclidean_distance(x_1, x_2):
  distance = math.sqrt(np.sum((x_1 - x_2)**2))
  return distance

def manhattan_distance(x_1, x_2):
  distance = np.sum(abs(x_1 - x_2))
  return distance

def cosine_similarity(x_1, x_2):
  dot_product = np.dot(x_1, x_2)
  norm_x_1 = math.sqrt(x_1**2)
  norm_x_2 = math.sqrt(x_2**2)

  if norm_x_1 == 0 or norm_x_2 == 0:
    return 0

  distance = dot_product/(norm_x_1 * norm_x_2)
  return distance

def hamming_distance(x_1, x_2):
  distance  = np.sum(np.array(x_1) != np.array(x_2)) #counting the elements that are different
  return distance

In [8]:
class KNN:

  def __init__(self, k, distance_function):
    self.k = k
    self.distance_function = distance_function

  def fit(self, X_train, y_train):
    self.X_train = X_train
    self.y_train = y_train

  def predict(self, x):

    # compute the distance of point x to all data points in the training set
    distances = [self.distance_function(x,x_train) for x_train in self.X_train]

    # choose the k closest data points to x
    # function np.argsort returns the indexes of the sorted array
    k_closest_samples_index = np.argsort(distances)[:self.k]

    # let's suppose it is a classification problem
    k_closest_labels = [self.y_train[index] for index in k_closest_samples_index]

    #majority vote
    most_common_label = Counter(k_closest_labels).most_common()
    return most_common_label
