In [None]:
# Exercise 6: KNN - Diabetes Dataset

# Importing necessary libraries
import pandas as pd
from sklearn import datasets
from sklearn import neighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler

# Loading the Diabetes dataset
db = datasets.load_diabetes()

# Standardizing the features
scaler = StandardScaler()
X, y = db.data, db.target
X_scaled = scaler.fit_transform(X)

# Binning the target variable for classification
y_binned = np.digitize(y, bins=[y.mean() - y.std(), y.mean() + y.std()])

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_binned, train_size=0.8, random_state=42)

# Creating and training the KNN classifier
classifier = neighbors.KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

# Making predictions and calculating accuracy
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Prediction on unseen data
input_data = np.array([[4, 130, 70, 20, 80, 30.5, 0.4, 45, 0, 0]])
prediction = classifier.predict(input_data)
print("Prediction:", "Diabetic" if prediction[0] == 1 else "Non-Diabetic")

# Scatter Plot of Actual vs. Predicted Outcomes
plt.figure(figsize=(10, 6))
colors = np.where(y_pred == 0, 'green', 'red')
plt.scatter(X_test[:, 0], X_test[:, 2], color=colors, alpha=0.7, edgecolors='k')
plt.title('KNN Predictions (Age vs. Glucose)')
plt.xlabel('Age (standardized)')
plt.ylabel('Glucose Level (standardized)')
plt.grid()
plt.scatter([], [], color='green', label='Predicted Class 0 (Non-Diabetic)')
plt.scatter([], [], color='red', label='Predicted Class 1 (Diabetic)')
plt.legend()
plt.show()
