# KNN-Classifier

In [1]:
import numpy as np

def train(X_train, y_train):
    '''trains the K-NN classifier; i.e. storage of training data'''
    return

In [4]:
# Takes two samples x and y and computes the euclidean distance
euclidean = lambda x,y : np.sqrt(np.sum((x-y)**2))

def calculate_distance(X_new, X_train):
  ''' Calculates distance between new sample and stored training samples'''

  distances = []

  for i in range(len(X_train)):
      distances.append(euclidean(X_new,X_train[i,:]))
  return distances

In [6]:
def predict_labels(y_train, distances, k):
  near_neigh_index = np.argsort(distances)[:k] # Find indices of k nearest neighbors
  neigh_labels = y_train[near_neigh_index]  # labels of k nearest neighbors

  return neigh_labels

In [8]:
def knn_classifier(X_train, y_train, X_test, k):
  '''K-NN classifier : Trains and then predicts label of test set

  Parameters
  ----------
  X_train: Numpy.array, shape(n,m)
      training feature vector
  y_train: Numpy.array, shape(n,)
      training target vector
  X_test: Numpy.array, shape(z,m)
      Data to be classified
  k: int
      Number of neighbors to take into account

  Returns
  -------
  predictions: Numpy.array, shape(z,)
      predicted labels of X_test
  '''

  train(X_train, y_train) # Training phase

  predictions = []

  for i in range(len(X_test)): # Prediction phase
    neigh_dist = calculate_distance(X_test[i,:], X_train)
    k_labels = predict_labels(y_train, neigh_dist, k)
    predictions.append(np.argmax(np.bincount(k_labels)))

  return np.array(predictions)

## Performing knn classifier with iris dataset

In [14]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X_iris, y_iris = load_iris(return_X_y = True) # Loading the iris dataset

X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=0.3)

In [15]:
# Predictions of training set
y_train_pred = knn_classifier(X_train, y_train, X_train, 3)

# Predictions of test set
y_test_pred = knn_classifier(X_train, y_train, X_test, 3)

In [16]:
from sklearn.metrics import classification_report

# Performance evaluation
print('\t------------------training set------------------')
print(classification_report(y_train, y_train_pred))
print('\t--------------------test set--------------------')
print(classification_report(y_test, y_test_pred))

	------------------training set------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       0.95      0.97      0.96        36
           2       0.97      0.95      0.96        37

    accuracy                           0.97       105
   macro avg       0.97      0.97      0.97       105
weighted avg       0.97      0.97      0.97       105

	--------------------test set--------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        18
           1       1.00      0.93      0.96        14
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



- performs exceptionally with accuracy of 97% on training set, and 96% on test set.