# KNN - From Scratch

### Loading Data and Testing the sklearn baseline Function

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

df = sns.load_dataset('iris')

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0,test_size= 0.2)


# Pre processing 
from sklearn.preprocessing import LabelEncoder,StandardScaler
scaler = StandardScaler()
encoder = LabelEncoder()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [2]:
# Using the sklearn baseline
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors = 3)
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

acc = np.sum(y_pred == y_test )/len(y_pred)

print("Baseline Model Accuracy is: ",acc)

Baseline Model Accuracy is:  0.9666666666666667


## Custom Model

In [3]:
import numpy as np
from collections import Counter

# Helper Function
def euclidien_distance(x,y):
    distance = np.sqrt(np.sum((x-y)**2))
    return distance

In [4]:
class KNN:
    def __init__(self,k = 3):
        self.k = k

    def fit(self,X,y):
        self.X_train = X
        self.y_train = y

    def predict(self,X):
        predictions = [self._predict(x) for x in X]
        return predictions

    # A private helper Function for the main predict function
    def _predict(self,x):
        
        #Compute the distances
        distances = [euclidien_distance(x,x_train) for x_train in self.X_train]

        #Get the closest k
        k_indices = np.argsort(distances)[:self.k]  #Finding the k closest indices
        k_nearest_labels = [self.y_train[i] for i in k_indices] 

        most_common = Counter(k_nearest_labels).most_common()
        return most_common[0][0]     

### Testing the Custom Function

In [5]:
my_clf = KNN()

my_clf.fit(X_train,y_train)

my_pred = my_clf.predict(X_test)

my_acc = np.sum(my_pred == y_test)/len(y_test)

print("Custom Model Accuracy is: ",my_acc)

Custom Model Accuracy is:  0.9666666666666667
