In [548]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns

In [549]:
diabetes = pd.read_csv("diabetes.csv")
diabetes

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [550]:
diabetes = diabetes.drop(columns=["Pregnancies", "SkinThickness"])
diabetes

Unnamed: 0,Glucose,BloodPressure,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,148,72,0,33.6,0.627,50,1
1,85,66,0,26.6,0.351,31,0
2,183,64,0,23.3,0.672,32,1
3,89,66,94,28.1,0.167,21,0
4,137,40,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...
763,101,76,180,32.9,0.171,63,0
764,122,70,0,36.8,0.340,27,0
765,121,72,112,26.2,0.245,30,0
766,126,60,0,30.1,0.349,47,1


In [551]:
df = diabetes.copy()

In [552]:
# Replace zeroes
zero_not_accepted = ["Glucose", "Insulin", "BloodPressure", "Age"]

for column in zero_not_accepted:
    df[column] = df[column].replace(0, np.NaN)
    mean = int(df[column].mean(skipna=True))
    df[column] = df[column].replace(np.NaN, mean)

In [553]:
diabetes = df.copy()

In [554]:
x_train, x_test, y_train, y_test = train_test_split(diabetes.drop(["Outcome"], axis=1), diabetes.Outcome, random_state=0)

In [555]:
SC = StandardScaler()
x_train = SC.fit_transform(x_train)
x_test = SC.transform(x_test)

In [556]:
classifier = KNeighborsClassifier(n_neighbors=17, p=2, metric="euclidean")

In [557]:
classifier.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
                     metric_params=None, n_jobs=None, n_neighbors=17, p=2,
                     weights='uniform')

In [558]:
y_pred = classifier.predict(x_test)
con_matrix = confusion_matrix(y_pred=y_pred, y_true=y_test)
np.transpose(con_matrix)

array([[114,  26],
       [ 16,  36]], dtype=int64)

In [559]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.88      0.84       130
           1       0.69      0.58      0.63        62

    accuracy                           0.78       192
   macro avg       0.75      0.73      0.74       192
weighted avg       0.77      0.78      0.78       192

