# K - Nearest Neighbours Implementation (One line classifier)

> By **Livin Nector D**

## Importing Modules

In [1]:
import numpy as np
import pandas as pd

## Class for KNN Classifier

In [2]:
def euclidian_dist(p1,p2):
    return np.linalg.norm(p1-p2[...,np.newaxis,:],axis=-1)

In [3]:
class KNNClassifier:
    def __init__(self,train_x,train_y,k=3,dist=euclidian_dist):
        self.train_x = train_x
        self.train_y = train_y
        self.classes = np.unique(train_y)
        self.k = k
        self.dist = dist
    
    def classify(self,test_point):
        return np.argmax(
            np.sum(
                self.train_y[np.argsort(self.dist(self.train_x,test_point))[...,:self.k]][...,np.newaxis,:] \
                == self.classes.reshape(-1,1),
                axis = -1
            ),
            axis=-1
        )

## Loading the dataset

In [4]:
df = pd.read_csv("pima-indians-diabetes.csv")

In [5]:
df.head()

Unnamed: 0,1. Number of times pregnant,2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test,3. Diastolic blood pressure (mm Hg),4. Triceps skin fold thickness (mm),5. 2-Hour serum insulin (mu U/ml),6. Body mass index (weight in kg/(height in m)^2),7. Diabetes pedigree function,8. Age (years),9. Class variable (0 or 1)
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


## Spliting Training and Testing data

In [6]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

split_ratio = .7
sl = int(X.shape[0]*split_ratio)
X_train,y_train = X[:sl],y[:sl]
X_test,y_test = X[sl:],y[sl:]

## Implementing the classifier

In [7]:
diabetes_predictor = KNNClassifier(X_train,y_train)
y_pred = diabetes_predictor.classify(X_test)

## Evaluating the model

In [8]:
print("Acuracy of the classifier is :",(y_pred == y_test).mean())

Acuracy of the classifier is : 0.7186147186147186
