In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [2]:
df = pd.read_csv("diabetes.csv")

In [3]:
# Converting all data to numeric and filling missing values with the column mean
df = df.apply(pd.to_numeric, errors='coerce')
df.fillna(df.mean(), inplace=True)

In [4]:
# Splitting the data into features (X) and target (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome'].astype(int)

In [5]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# Initializing and training the K-Nearest Neighbors classifier
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [8]:
# Making predictions on the test set
y_pred = knn.predict(X_test)

In [9]:
# Calculating and displaying the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[79 20]
 [27 28]]


In [10]:
# Calculating evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

In [11]:
# Displaying the results
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Error Rate: {error_rate * 100:.2f}%')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

Accuracy: 69.48%
Error Rate: 30.52%
Precision: 0.58
Recall: 0.51
