In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, accuracy_score, roc_auc_score, roc_curve, classification_report
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

In [3]:
df = pd.read_csv(".\diabetes.csv")
y = df["Outcome"]
X = df.drop(["Outcome"], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [4]:
knn_model = KNeighborsClassifier().fit(X_train, y_train)
knn_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [5]:
knn_params = {"leaf_size": [10,15,30,40], "n_neighbors": [3,5,7,10], "p": [1,2,3,5]}

In [6]:
knn_cv_model = GridSearchCV(knn_model, knn_params, cv = 10, n_jobs = -1, verbose = 2).fit(X_train, y_train)

Fitting 10 folds for each of 64 candidates, totalling 640 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 360 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 617 out of 640 | elapsed:    3.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 640 out of 640 | elapsed:    3.5s finished


In [7]:
knn_cv_model.best_params_

{'leaf_size': 10, 'n_neighbors': 7, 'p': 1}

In [40]:
knn_tuned = KNeighborsClassifier(leaf_size = 10, n_neighbors = 10, p = 1).fit(X_train, y_train)

In [41]:
y_pred = knn_tuned.predict(X_test)

In [42]:
accuracy_score(y_test, y_pred)

0.7445887445887446

In [43]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.77      0.86      0.82       151
           1       0.67      0.53      0.59        80

    accuracy                           0.74       231
   macro avg       0.72      0.69      0.70       231
weighted avg       0.74      0.74      0.74       231

