
# Diabetes Prediction Model

This notebook trains multiple machine learning models (Logistic Regression, SVM, Random Forest) to predict diabetes based on medical diagnostic measurements. It also evaluates and compares their performance using accuracy, precision, recall, and F1 score.


In [None]:

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# Load dataset
diabetes_dataset = pd.read_csv('diabetes.csv')
diabetes_dataset.head()


In [None]:

X = diabetes_dataset.drop(columns='Outcome', axis=1)
Y = diabetes_dataset['Outcome']


In [None]:

scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)


In [None]:

X_train, X_test, Y_train, Y_test = train_test_split(
    X_standardized, Y, test_size=0.2, stratify=Y, random_state=42
)


In [None]:

models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Support Vector Machine (SVM)': SVC(kernel='linear', random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42)
}


In [None]:

results = []

for name, model in models.items():
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)
    acc = accuracy_score(Y_test, Y_pred)
    prec = precision_score(Y_test, Y_pred)
    rec = recall_score(Y_test, Y_pred)
    f1 = f1_score(Y_test, Y_pred)
    results.append({
        'Model': name,
        'Accuracy': round(acc, 2),
        'Precision': round(prec, 2),
        'Recall': round(rec, 2),
        'F1 Score': round(f1, 2)
    })

results_df = pd.DataFrame(results)
results_df


In [None]:

for name, model in models.items():
    Y_pred = model.predict(X_test)
    cm = confusion_matrix(Y_test, Y_pred)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix: {name}')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
