In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Load preprocessed dataset
df = pd.read_csv("../data/final_processed_dataset.csv")

# Identify target column
target_col = "Heart Disease Status"
X = df.drop(columns=[target_col])
y = df[target_col]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [2]:
from sklearn.ensemble import RandomForestClassifier

pipe = Pipeline([("clf", RandomForestClassifier(random_state=42, n_jobs=-1))])
param_grid = {"clf__n_estimators": [50, 100], "clf__max_depth": [5, None]}

grid = GridSearchCV(pipe, param_grid, cv=cv, scoring="f1", n_jobs=-1)
grid.fit(X_train, y_train)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Evaluate model on test data
y_pred = grid.predict(X_test)
y_prob = grid.predict_proba(X_test)[:, 1]  # needed for ROC AUC

print("Best Parameters:", grid.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Best Parameters: {'clf__max_depth': None, 'clf__n_estimators': 100}
Accuracy: 0.963125
Precision: 0.9768339768339769
Recall: 0.94875
F1 Score: 0.9625871908687381
ROC AUC: 0.9765568359375002
Confusion Matrix:
 [[1564   36]
 [  82 1518]]


In [3]:
import joblib
joblib.dump(grid.best_estimator_, "../model/heart_disease_model.joblib")


['../model/heart_disease_model.joblib']

In [5]:
feature_names = X.columns
joblib.dump(feature_names, "../model/feature_names.joblib")


['../model/feature_names.joblib']