In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

df = pd.read_csv("../data/heart_disease.csv")
if "target" not in df.columns and "num" in df.columns:
    df["target"] = (df["num"] > 0).astype(int)
    df.drop(columns=["num"], inplace=True)

X = df.drop(columns=["target"]); y = df["target"]
num = X.select_dtypes("number").columns.tolist()
cat = [c for c in X.columns if c not in num]

pre = ColumnTransformer([
    ("num", Pipeline([("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]), num),
    ("cat", Pipeline([("imputer", SimpleImputer(strategy="most_frequent")), ("ohe", OneHotEncoder(handle_unknown="ignore"))]), cat)
])
pipe = Pipeline([("pre", pre), ("clf", LogisticRegression(max_iter=600))])

Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
pipe.fit(Xtr, ytr)
print(classification_report(yte, pipe.predict(Xte)))
try:
    y_prob = pipe.predict_proba(Xte)[:,1]
    auc = roc_auc_score(yte, y_prob)
    fpr, tpr, _ = roc_curve(yte, y_prob)
    plt.figure(figsize=(5,4))
    plt.plot(fpr, tpr, label=f"ROC (AUC={auc:.3f})")
    plt.plot([0,1],[0,1],"--")
    plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
    plt.title("ROC Curve — Logistic Regression")
    plt.legend(loc="lower right")
    plt.show()
except Exception as e:
    print("Could not compute ROC/AUC:", e)
