# **Diabetes Model**:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# Load dataset
df = pd.read_csv("diabetes.csv")

# Replace 0s with median values in certain columns
for col in ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']:
    df[col] = df[col].replace(0, df[col].median())

# Split features and labels
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train models
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Evaluate
print("Logistic Regression Report:")
print(classification_report(y_test, lr.predict(X_test_scaled)))
print("ROC-AUC:", roc_auc_score(y_test, lr.predict_proba(X_test_scaled)[:, 1]))

# Save model
joblib.dump(lr, "diabetes_model.pkl")


Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.81      0.83      0.82        99
           1       0.68      0.65      0.67        55

    accuracy                           0.77       154
   macro avg       0.75      0.74      0.74       154
weighted avg       0.76      0.77      0.77       154

ROC-AUC: 0.8196510560146923


['diabetes_model.pkl']

# **Heart Disease**

In [None]:
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("heart.csv")

# Encode categorical columns
categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

# Feature and target split
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model training
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Evaluation
print("Logistic Regression Report:")
print(classification_report(y_test, lr.predict(X_test_scaled)))
print("ROC-AUC:", roc_auc_score(y_test, lr.predict_proba(X_test_scaled)[:, 1]))

# Save model
joblib.dump(lr, "heart_disease_model.pkl")


Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.78      0.88      0.83        77
           1       0.91      0.82      0.86       107

    accuracy                           0.85       184
   macro avg       0.84      0.85      0.85       184
weighted avg       0.85      0.85      0.85       184

ROC-AUC: 0.9008374802767326


['heart_disease_model.pkl']

# **Breast Cancer**

In [None]:
from sklearn.linear_model import LogisticRegression

# Load dataset
df = pd.read_csv("breast_cancer.csv")

# Drop ID column and the unnamed column with NaN values
df.drop(["id", "Unnamed: 32"], axis=1, inplace=True)

# Encode target: M = 1, B = 0
df["diagnosis"] = df["diagnosis"].map({'M': 1, 'B': 0})

# Feature and target split
X = df.drop("diagnosis", axis=1)
y = df["diagnosis"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train models
lr = LogisticRegression(max_iter=10000)
lr.fit(X_train_scaled, y_train)

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Evaluate models
print("Logistic Regression Report:")
print(classification_report(y_test, lr.predict(X_test_scaled)))
print("ROC-AUC:", roc_auc_score(y_test, lr.predict_proba(X_test_scaled)[:, 1]))

# Save the model
joblib.dump(lr, "breast_cancer_model.pkl")

Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        71
           1       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

ROC-AUC: 0.99737962659679


['breast_cancer_model.pkl']