In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from itertools import combinations

import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

dataset = pd.read_csv("../input/diabetes-data-set/diabetes.csv")
print(dataset.head())
print(dataset.isnull().sum())

In [None]:
################################################################# Feature Refining

z = np.abs(stats.zscore(dataset))
dataset_clean = dataset[(z < 3).all(axis = 1)]

y = dataset_clean["Outcome"]
y.replace(to_replace = [1], value = "Diabetic", inplace = True)
y.replace(to_replace = [0], value = "Non Diabetic", inplace = True)

x = dataset_clean.drop(["Outcome"], axis = 1)

scaler = MinMaxScaler()
x = scaler.fit_transform(x)

In [None]:
################################################################# KNN Model

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 1)
score_train = []
score_test = []
model = []
for n_neighbor in range (1, 10):
    model.append(KNeighborsClassifier(n_neighbors = n_neighbor, metric = "euclidean"))
    model[n_neighbor - 1].fit(x_train, y_train)

    y_pred_train = model[n_neighbor - 1].predict(x_train)
    y_pred_test = model[n_neighbor - 1].predict(x_test)
    
    score_train.append(model[n_neighbor - 1].score(x_train, y_train))
    score_test.append(model[n_neighbor - 1].score(x_test, y_test))

plt.figure(figsize = (12, 5))
plot = sns.lineplot(range(1, 10),score_train, label = 'score_train')
plot = sns.lineplot(range(1, 10),score_test, label = 'score_test')

n_neighbor_best = int(score_test.index(max(score_test)))
print('\n\n#################################################################\n################################################################# KNN Model with n_neighbors = ', n_neighbor_best + 1, '\n#################################################################\n\n')
print('---------------------------------------------------------')
print('****************** KNN Classification ******************')    
print('Classes: ', model[n_neighbor_best].classes_)
print('Effective Metric: ', model[n_neighbor_best].effective_metric_)
print('Effective Metric Params: ', model[n_neighbor_best].effective_metric_params_)
print('No. of Samples Fit: ', model[n_neighbor_best].n_samples_fit_)
#print('Outputs 2D: ', model.outputs_2d_)
#print('--------------------------------------------------------')
print("")

print('*************** Evaluation on Test Data ***************')
score_test.append(model[n_neighbor_best].score(x_test, y_test))
print('Accuracy Score: ', score_test[n_neighbor_best - 1])
# Look at classification report to evaluate the model
print(classification_report(y_test, y_pred_test))
#print('--------------------------------------------------------')
print("")

print('*************** Evaluation on Training Data ***************')
score_train.append(model[n_neighbor_best].score(x_train, y_train))
print('Accuracy Score: ', score_train[n_neighbor_best - 1])
# Look at classification report to evaluate the model
print(classification_report(y_train, y_pred_train))
print('---------------------------------------------------------')