In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# Generate synthetic heart disease dataset
X, y = make_classification(
    n_samples=1000, n_features=8, n_informative=5, n_redundant=1,
    n_clusters_per_class=2, weights=[0.7, 0.3], random_state=42
)

df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(8)])
df["heart_disease"] = y
print(df["heart_disease"].value_counts())

# Prepare data
X = df.drop("heart_disease", axis=1)
y = df["heart_disease"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Test accuracy:", accuracy_score(y_test, y_pred))

# Fixed 5-Fold CV
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy')
cv_mean_score = cv_scores.mean()
cv_std_score = cv_scores.std()
print(f"5-Fold CV scores: {cv_scores}")
print(f"Mean: {cv_mean_score:.4f}, Std: {cv_std_score:.4f}")

# Stratified CV
stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stratified_cv_scores = cross_val_score(model, X_train, y_train, cv=stratified_cv, scoring='accuracy')
stratified_cv_mean = stratified_cv_scores.mean()
stratified_cv_std = stratified_cv_scores.std()
print(f"Stratified 5-Fold CV scores: {stratified_cv_scores}")
print(f"Mean: {stratified_cv_mean:.4f}, Std: {stratified_cv_std:.4f}")

# F1 and ROC-AUC CV
cv_f1_scores = cross_val_score(model, X_train, y_train, cv=stratified_cv, scoring='f1')
cv_f1_mean = cv_f1_scores.mean()
cv_roc_auc_scores = cross_val_score(model, X_train, y_train, cv=stratified_cv, scoring='roc_auc')
cv_roc_auc_mean = cv_roc_auc_scores.mean()

print(f"F1 CV scores: {cv_f1_scores}")
print(f"Mean F1: {cv_f1_mean:.4f}")
print(f"ROC-AUC CV scores: {cv_roc_auc_scores}")
print(f"Mean ROC-AUC: {cv_roc_auc_mean:.4f}")
