In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

kidney_disease_data = pd.read_csv("kidney_disease.csv")
kidney_disease_data = kidney_disease_data.replace('?', pd.NA).dropna()

X = kidney_disease_data.drop(columns=["classification"])
y = kidney_disease_data["classification"]

X = pd.get_dummies(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    random_state=42
)

k_values = [1, 3, 5, 7, 9]
accuracy_results = []

for k in k_values:
    # Train a K-Nearest Neighbors classifier with k using the training data
    knn_model = KNeighborsClassifier(n_neighbors = k)
    knn_model.fit(X_train, y_train)

    # Predict labels for the test data
    y_pred = knn_model.predict(X_test)

    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_results.append({"k": k, "Test_Accuracy": accuracy})

# Results table
results_df = pd.DataFrame(accuracy_results)
print(results_df.to_string(index=False))

# Best k
best_row = results_df.loc[results_df["Test_Accuracy"].idxmax()]
best_k = int(best_row["k"])
best_accuracy = float(best_row["Test_Accuracy"])
print(f"\nBest k = {best_k} with test accuracy = {best_accuracy:.4f}")

# Changing k controls how “local” the decision boundary is: small k uses very nearby neighbors, large k averages across more points.

# Very small k can overfit because the model becomes sensitive to noise or unusual training points and may memorize quirks.

# Very large k can underfit because it smooths too much and may ignore meaningful local patterns that separate ckd vs notckd.

# In practice, we compare several k values on a test (or validation) set to find a balance that generalizes well.

 k  Test_Accuracy
 1       1.000000
 3       1.000000
 5       1.000000
 7       0.979167
 9       0.979167

Best k = 1 with test accuracy = 1.0000
