## KNN : A sample is classified by a Popularity vote of its nearest neighbors
### We calculate the distance of current sample of test with each of the Training sample to find the nearest neighbors
### To Calculate the distance, we use the Euclidean distance

$$ d = \sqrt{( X_2 - X_1 )^2  + ( Y_2 - Y_1 )^2 }$$

### In General the formula can be written as:

$$ \sqrt{ \sum_{i=0}^{n}(( p_i - q_i )^2 ) }  $$ 

In [1]:
import numpy as np
from collections import Counter

In [2]:
# Define the distance formula as a Global function
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1-x2)**2))

In [3]:
# KNN doen't involve a training step
# We can store the training samples in fit method
class KNN:
    def __init__(self, k = 3):
        #In init we simply want to store k
        self.k = k
        
    def fit(self, X, y):       # Fit method to fit the training samples(X) and some training labels(y)
        self.X_train = X
        self.y_train = y
    
    #Define a Predict method
    def predict(self, X):
        # We want to predict over each sample
        pred_labels = [self._predict(x) for x in X]     #Predicting x in all of the X samples
        return np.array(pred_labels)                    # return pred_labels as its a list
        
        
    def _predict(self,x):     # To predict for only one sample, use it in predict function above
        # Compute the distances
        # need to calcualte distance of x with all the training samples in the list
        distances =  [euclidean_distance(x, x_train) for x_train in self.X_train]
        
        # Get K nearest samples, labels
        # k_indices = np.argsort(distances)        # It will sort the distances and return the indices
        # WE also only want to have k closest samples
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        
        # Majority cote, most common class labels 
        most_common = Counter(k_nearest_labels).most_common(1)   #1 is for first or very common neighbor
        return most_common[0][0]