# K Nearest Neihgbors

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
import seaborn as sns
%matplotlib inline

In [None]:
train_data = pd.read_csv(r'.\train.csv')
train_data.head()

In [None]:
test_data = pd.read_csv(r'.\test.csv')
test_data.head()

# Visualization

In [None]:
sns.pairplot(train_data, hue='diabetes')

In [None]:
train_data.describe().T

In [None]:
train_data.info()

# K Nearest Neighbors

In [None]:
split = int(train_data.shape[0] * 0.8)
split

In [None]:
train_X = train_data[['glucose_concentration', 'blood_pressure']]
trainX = train_X[:split]
testX = train_X[split:]
testX

In [None]:
train_Y = train_data[['diabetes']]
trainY = train_Y[:split]
trainY
testY = train_Y[split:]

In [None]:
testX = test_data[['glucose_concentration', 'blood_pressure']]

In [None]:
from scipy.stats import mode


class KNNClassifier:

    def __init__(self, k=3):
        self.n_neighbors = k
        self.k_neighbors = None

    def fit(self, X: np.ndarray, y: np.ndarray):
        self.X = X
        self.y = y
        return f"KNNClassifier(n_neighrbors={self.n_neighbors})"

    def predict(self, x: np.ndarray):
        self.predictions = []
        distance_matrix = None
        for obs in x:
            sq = np.power((x - obs), 2)
            sq_sum = sq.sum(axis=1, keepdims=True)
            distance_matrix = np.sqrt(sq_sum)
            self.k_neighbors = np.argsort(distance_matrix.ravel())[
                :self.n_neighbors]
            votes = self.y[self.k_neighbors]
            majority = mode(votes)
            self.predictions.append(majority[0][0])
        return np.array(self.predictions)


# Model Evaluators


In [None]:
clf = KNNClassifier(k=9)
clf.fit(trainX.values,trainY.values)
yhat = clf.predict(testX.values)

In [None]:
def Jaccard_index(Y, yhat):
    total = len(Y)
    if not isinstance(Y, np.ndarray):
        Y = Y.values
    if not isinstance(yhat, np.ndarray):
        yhat = yhat.values
    correct_prediction = sum(
        [1 if Y[index][0] == yhat[index][0] else 0 for index in range(total)])
    jaccard_index_score = (
        correct_prediction/((total+total) - correct_prediction))
    return round(jaccard_index_score, 4)


def F1_Score(Y, yhat):
    total = len(Y)
    f1_score = []
    if not isinstance(Y, np.ndarray):
        Y = Y.values
    if not isinstance(yhat, np.ndarray):
        yhat = yhat.values
    TP = 0
    FN = 0
    FP = 0
    TN = 0
    for index in range(total):
        if Y[index][0] == yhat[index][0]:
            if Y[index][0] == 1:
                TP += 1
            else:
                TN += 1
        else:
            if yhat[index][0] == 1:
                FN += 1
            else:
                FP += 1
    precision_1 = TP/(TP+FP)
    recall_1 = TP/(TP+FN)
    f1_score.append((2*(precision_1*recall_1))/(precision_1+recall_1))
    precision_0 = TN/(TN+FN)
    recall_0 = TN/(TN+FP)
    f1_score.append((2*(precision_0*recall_0))/(precision_0+recall_0))
    return round((sum(f1_score)/len(f1_score)), 4)


In [None]:

Jaccard_index(testY, yhat)


In [None]:
F1_Score(testY, yhat)