In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load dataset
dataset = pd.read_csv("./CKD.csv")

In [3]:
# Separate independent and dependent variables
independent = dataset.drop("classification", axis=1)
dependent = dataset["classification"]

In [4]:
# convert categorical data into binary format(one-hot encoding)
independent = pd.get_dummies(independent, drop_first=True)

In [5]:
# Dependent variable is categorical (e.g., 'yes'/'no'), encode it to numerical values
label_encoder = LabelEncoder()
dependent = label_encoder.fit_transform(dependent)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=1/3, random_state=0)

In [13]:
classifier = KNeighborsClassifier(n_neighbors = 7, metric = 'minkowski', p = 2)
classifier.fit(x_train, y_train)
y_predict = classifier.predict(x_test)
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.57      0.78      0.66        51
           1       0.83      0.63      0.72        82

    accuracy                           0.69       133
   macro avg       0.70      0.71      0.69       133
weighted avg       0.73      0.69      0.70       133



In [15]:

param_grid = {
    'n_neighbors': [3, 5, 7, 10, 15, 20],
    'metric': ['euclidean', 'manhattan', 'minkowski', 'chebyshev', 'cosine'],
    'p': [1, 2]
}

grid = GridSearchCV(KNeighborsClassifier(), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='f1_weighted')
grid.fit(x_train, y_train)

best_params = grid.best_params_
print("Best Parameters:", best_params)

y_predict = grid.predict(x_test)
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict)
print(report)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
Best Parameters: {'metric': 'cosine', 'n_neighbors': 5, 'p': 1}
              precision    recall  f1-score   support

           0       0.80      0.94      0.86        51
           1       0.96      0.85      0.90        82

    accuracy                           0.89       133
   macro avg       0.88      0.90      0.88       133
weighted avg       0.90      0.89      0.89       133

