In [1]:
import numpy as np
import pandas as pd
import struct
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
class KNN():
    def __init__(self, k, similarity_metric):
        self.k = k
        self.similarity_metric = similarity_metric
        if similarity_metric == 'cosine':
            self.order = -1
        else:
            self.order = 1
        
    def fit(self, X_train, y_train):
        self.X_train = X_train.astype(float)
        self.y_train = y_train

    def predict(self, X_test):
        from scipy import stats
        self.similarity_matrix = self.get_similarity_matrix(self.X_train, X_test)
        top_k_similar = np.zeros((X_test.shape[0],self.k), dtype = int)
        for i in range(X_test.shape[0]):
            top_k_similar[i] += self.y_train[np.argsort(self.order*self.similarity_matrix[i])[0:self.k]]
        predictions = stats.mode(top_k_similar,axis = 1)[0].flatten()
        return predictions
    
    def get_similarity_matrix(self, X, Y):
    
    #X and Y are matrices of vectors
    #Rows represent individual vectors
    #Columns represents values for each feature of a vector
       
    #1 Cosine Similarity
    
        if self.similarity_metric == 'cosine':
            X_2 = np.square(X)
            X_2 = X_2.sum(axis = 1).reshape(1, X.shape[0])
            X_2 = np.sqrt(X_2)
            X_2 = X_2 * np.ones(shape = (Y.shape[0],X.shape[0]), dtype = int)

            Y_2 = np.square(Y)
            Y_2 = Y_2.sum(axis = 1).reshape(Y.shape[0],1)
            Y_2 = np.sqrt(Y_2)
            Y_2 = Y_2 * np.ones(shape = (1,X.shape[0]), dtype = int)

            similarity_matrix = np.matmul(Y, X.T) / (X_2 * Y_2)


        #2 Euclidean Distance

        elif self.similarity_metric == 'euclidean':
            X_2 = np.square(X)
            X_2 = X_2.sum(axis = 1).reshape(1, X.shape[0])
            X_2 = X_2 * np.ones(shape = (Y.shape[0],X.shape[0]), dtype = int)

            Y_2 = np.square(Y)
            Y_2 = Y_2.sum(axis = 1).reshape(Y.shape[0],1)
            Y_2 = Y_2 * np.ones(shape = (1,X.shape[0]), dtype = int)

            similarity_matrix = np.sqrt(Y_2 + X_2 - 2 * np.matmul(Y, X.T))

        #3 Manhattan Distance

        elif self.similarity_metric == 'manhattan':
            from scipy.spatial.distance import cityblock
            similarity_matrix = np.zeros((Y.shape[0],X.shape[0]))
            for i in range(Y.shape[0]):
                for j in range(X.shape[0]):
                    similarity_matrix[i][j] += cityblock(Y[i],X[j])
                    
        #4 Edit Distance
        elif self.similarity_metric == 'edit_distance':
            max_dist = X.shape[1] * np.ones(shape = (Y.shape[0],X.shape[0])) 
            similarity_matrix = 0.5 * (max_dist - np.matmul(Y, X.T))

        return similarity_matrix     