# K-Nearest Neighbour Classification

### importing necessary libraries

In [1]:
import numpy as np
from sklearn.datasets import load_iris

### Loading the dataset

In [2]:
X,y = load_iris(return_X_y=True)

### Splitting the entire dataset into train and test

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2)

print(X_train.shape, X_test.shape)

(120, 4) (30, 4)


### Standardizing the dataset

In [4]:
from sklearn.preprocessing import StandardScaler

SC = StandardScaler()
X_train = SC.fit_transform(X_train)
X_test = SC.fit_transform(X_test)

### Pairwise distances : How it works

In [5]:
from sklearn.metrics import pairwise_distances

pts_1 = np.asarray([[0, 1],[1, -1]])
pts_2 = np.asarray([[0, 0], [2, 0]])

pairwise_distances(pts_1, pts_2)

array([[1.        , 2.23606798],
       [1.41421356, 1.41421356]])

### kNN Classification Algorithm

In [6]:
def kNN_classifier(X_train, y_train, n_class, X_test, k = 3):
    
    n_test_pt = X_test.shape[0]
    D = pairwise_distances(X_test, X_train)
    y_pred = np.empty(n_test_pt)

    for i in range(n_test_pt):
        neighbors = np.argsort(D[i,:])[1:k+1]
        labels_neigh = y_train[neighbors]
        count = np.zeros(n_class)
        for j in labels_neigh:
            count[j] += 1
        y_pred[i] = np.argmax(count)
    
    return y_pred

### Prediction using KNN Classifier

In [7]:
y_pred = kNN_classifier(X_train, y_train, n_class = 3, X_test = X_test, k = 3)
y_pred.shape

(30,)

### Defining confusion matrix function

In [8]:
def confusion_matrix(y_true, y_pred, n_class):
    
    conf_mat = np.zeros((n_class, n_class),dtype='int')
    for i in range(y_true.shape[0]):
        conf_mat[int(y_true[i]), int(y_pred[i])] += 1
    
    return conf_mat

### Evaluating the model

In [9]:
cm = confusion_matrix(y_test, y_pred, n_class=3)

cm

array([[12,  0,  0],
       [ 0,  6,  0],
       [ 0,  2, 10]])

In [10]:
accuracy = np.trace(cm)/np.sum(cm)

print("Correctly identified: {} and total: {}".format(np.trace(cm), np.sum(cm)))

print("Accuracy with kNN and k = 3 : {:0.2f}%".format(accuracy*100))

Correctly identified: 28 and total: 30
Accuracy with kNN and k = 3 : 93.33%


### KNN classification using sklearn

In [11]:
from sklearn.neighbors import KNeighborsClassifier # Importing the kNN Classifier class from sklearn

knn = KNeighborsClassifier(n_neighbors = 3)  # k-NN classifier with k = 3
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

### Testing the model

In [12]:
y_pred = knn.predict(X_test)

### Evaluating the model

In [13]:
conf_mat = confusion_matrix(y_test, y_pred, n_class=3)

conf_mat

array([[12,  0,  0],
       [ 0,  6,  0],
       [ 0,  2, 10]])

In [14]:
accuracy = np.trace(conf_mat)/np.sum(conf_mat)

print("Correctly identified: {} and total: {}".format(np.trace(conf_mat), np.sum(conf_mat)))

print("Accuracy with kNN and k = 3 : {:0.2f}%".format(accuracy*100))

Correctly identified: 28 and total: 30
Accuracy with kNN and k = 3 : 93.33%
