# K Nearest Neighbors Algorithm

## Loading Useful Libraries and Functions

In [1]:
import numpy as np
from scipy.stats import mode
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

## Loading IRIS Dataset

In [2]:
iris_dataset = load_iris()
X = iris_dataset.data
y = iris_dataset.target

## Train Test Split

In [3]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y)

In [4]:
print(f'Shape of train_X : {train_X.shape}')
print(f'Shape of test_X : {test_X.shape}')
print(f'Shape of train_y : {train_y.shape}')
print(f'Shape of test_y : {test_y.shape}')

Shape of train_X : (120, 4)
Shape of test_X : (30, 4)
Shape of train_y : (120,)
Shape of test_y : (30,)


## Distance Metric Function

In [6]:
def distance(array1, array2, p=2):
    diff = np.abs(array1 - array2)
    distance = np.sum(diff ** p) ** (1/p)
    return distance

## Prediction Function for a Given Query Point

In [7]:
def predict(train_X, train_y, query, k=7):
    
    distance_l = []
    for X in train_X:
        d = float(distance(X, query))
        distance_l.append(d)
    distance_v = np.array(distance_l)
    
    top_k_indices = distance_v.argsort()[:k]
    
    neighborhood_y = train_y[top_k_indices]
    
    predicted_class = mode(neighborhood_y)[0]
    
    return predicted_class.item()

## Accuracy Function

In [8]:
def accuracy(test_y, pred_y):
    return np.mean(test_y == pred_y)

## Testing

In [9]:
pred_y = []
for query in test_X:
    pred_y.append(predict(train_X, train_y, query))
pred_y = np.array(pred_y)
print(f"Prediction Accuracy : {accuracy(test_y, pred_y)}")

Prediction Accuracy : 0.9666666666666667
