In [1]:
#Modules that will be used
import numpy as np 
import statistics
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
class KNN_classifier():

    #Initiating the parameters 
    def __init__(self, distance_metric):
        self.distance_metric = distance_metric

    #getting the distance metric
    def get_distance_metric(self, training_data_point, test_data_point):
        if (self.distance_metric == 'euclidean'):
            dist = 0
            for i in range(len(training_data_point) - 1):
                dist = dist + (training_data_point[i] - test_data_point[i])**2

            euclidean_dist = np.sqrt(dist)
            return euclidean_dist
        
        elif (self.distance_metric == "manhattan"):
            dist = 0
            for i in range(len(training_data_point) - 1):
                dist = dist + abs(training_data_point[i] - test_data_point[i])
            manhattan_dist = dist
            return manhattan_dist

    #getting the nearest neighbors 
    def nearest_neighbors(self, X_train, test_data, k):
        distance_list = []
        for training_data in X_train:
            distance = self.get_distance_metric(training_data, test_data)
            distance_list.append((training_data,distance))
        distance_list.sort(key = lambda x:x[1])
        neighbors_list = []
        for j in range(k):
            neighbors_list.append(distance_list[j][0])
        return neighbors_list

    #predict the class of new datat point
    def predict(self, X_train, test_data, k):
        neighbors = self.nearest_neighbors(X_train, test_data, k)

        for data in neighbors : 
            label = []
            label.append(data[-1])
        predicted_class = statistics.mode(label)
        return predicted_class

In [3]:
#Loading the diabetes dataset
diabetes = pd.read_csv("C:\\Users\\raahu\\OneDrive\\Documents\\_Workspace\\Machine Learning\\Data sets\\diabetes.csv")

In [4]:
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
diabetes.shape

(768, 9)

In [6]:
#Separating features and labels
X = diabetes.drop(columns= "Outcome", axis = 1)
Y = diabetes['Outcome']

In [7]:
#Converting the data to numpy array
X = X.to_numpy()
Y = Y.to_numpy()

In [8]:
print(X)
print(Y)

[[  6.    148.     72.    ...  33.6     0.627  50.   ]
 [  1.     85.     66.    ...  26.6     0.351  31.   ]
 [  8.    183.     64.    ...  23.3     0.672  32.   ]
 ...
 [  5.    121.     72.    ...  26.2     0.245  30.   ]
 [  1.    126.     60.    ...  30.1     0.349  47.   ]
 [  1.     93.     70.    ...  30.4     0.315  23.   ]]
[1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0
 1 1 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0
 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1
 1 0 0 1 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0
 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 1 1 0 1 1 1 1
 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 1 1 1 0 0
 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 1 0 0 1 0

In [9]:
#Train test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y, random_state = 2)

In [10]:
#Adding X train to Y train because the model is built in a way where training data contains labels and data together
X_train = np.insert(X_train, 8, Y_train, axis = 1)

In [11]:
#Creating a classifer object for KNN
classifier = KNN_classifier(distance_metric='euclidean')

In [12]:
#Making prediction
#Predicting one point at a time with a loop to cover all points in the test data
prediction = []
for i in range(len(X_test)):
    prediction.append(classifier.predict(X_train, X_test[i], k = 15))

In [13]:
#Printing our prediction
print(prediction)

[0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]


In [14]:
accuracy = round(accuracy_score(Y_test, prediction)*100,2)
print(f'The accuracy of the model is {accuracy}%')

The accuracy of the model is 70.78%
