# Diabetes Prediction - Algorithm Comparison

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, roc_curve, auc

In [None]:
# Load data
df = pd.read_csv('pima-indians-diabetes.data.csv', header=None)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train all models and collect metrics
models = {'SVM': SVC(probability=True, random_state=42), 'Naive Bayes': GaussianNB(),
          'Decision Tree': DecisionTreeClassifier(random_state=42), 'KNN': KNeighborsClassifier()}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
    
    results[name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'F1-Score': f1_score(y_test, y_pred),
        'y_pred': y_pred, 'y_prob': y_prob
    }

# Display metrics
metrics_df = pd.DataFrame({k: {m: v for m, v in r.items() if m not in ['y_pred', 'y_prob']} 
                          for k, r in results.items()}).T
print(metrics_df.round(4))

In [None]:
# Confusion Matrices
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
for idx, (name, result) in enumerate(results.items()):
    cm = confusion_matrix(y_test, result['y_pred'])
    sns.heatmap(cm, annot=True, fmt='d', ax=axes[idx//2, idx%2], cmap='Blues')
    axes[idx//2, idx%2].set_title(name)
    axes[idx//2, idx%2].set_xlabel('Predicted')
    axes[idx//2, idx%2].set_ylabel('Actual')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curves Comparison
plt.figure(figsize=(8, 6))
for name, result in results.items():
    fpr, tpr, _ = roc_curve(y_test, result['y_prob'])
    plt.plot(fpr, tpr, label=f"{name} (AUC={auc(fpr, tpr):.3f})")
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Algorithm Comparison')
plt.legend()
plt.grid(alpha=0.3)
plt.show()