In [96]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import math
from collections import Counter

In [74]:
def find_dist(row , test_df):
    '''
    This method takes the complete list of features and the values of the dataframe for prediction and calculates
    the result based on those.
    '''
    for col in row.index:
        if col not in ['distance', 'group']:
            row['distance'] += abs(row[col] - test_df[col])  
    return row['distance']

In [147]:
class KNN:
    '''
    Implementing KNN algorith based on Eucledean distance as the distance matrix
    '''
    training_df = None
    result_df = None
    test_df = None
    predict_df = None
    
    def __init__(self, N = 1):
        self.N = N
        
    def fit(self, training_df , result_df):
        self.training_df = pd.DataFrame(data = training_df)
        self.result_df = result_df
        self.predict_df = self.training_df.copy()
        self.predict_df['distance'] = 0
        self.predict_df['group'] = self.result_df
        
    def predict(self , test_df):
        self.test_df = pd.DataFrame(data=test_df)
        return self.with_pandas_apply()
        
    def with_pandas_apply(self):
        #Specify axis = 1 to return the complete row to the function, else axis = 0 to send the complete column.
                
        self.predict_df['distance'] = self.predict_df.apply(find_dist ,axis = 1 ,args = (self.test_df,))
        return self.calculate_result()
        
    def calculate_result(self):
        '''
        This method will sort the dataframe based on the distance between the points. Fetch the top N results and
        return the most occured group value.
        '''
        self.predict_df.sort_values(by = 'distance', ascending=True , inplace= True)
        result_list = self.predict_df.head(self.N)['group'].value_counts().idxmax()
        
        return result_list

In [148]:
if __name__ == "__main__":
    knn = KNN(N=5)
    x , y = load_iris(return_X_y= True)
    x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.3 , random_state = 400)
    knn.fit(x_train,y_train)
    output = knn.predict([[6.3,3.3,4.7,1.6]])
    
    print(output)

1
