In [1]:

import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib

In [2]:
df = pd.read_csv('data/heart_reduced.csv')
X = df.drop(columns=['target'])
y = df['target']


In [3]:

# --- Random Forest Tuning ---
rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 400],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}

gs = GridSearchCV(rf, param_grid, cv=5, scoring='roc_auc', n_jobs=-1)
gs.fit(X, y)
print('Best RF params:', gs.best_params_)
print('Best RF AUC:', gs.best_score_)

# Save best RF model
joblib.dump(gs.best_estimator_, 'models/best_random_forest.pkl')
print('Saved best Random Forest model.')


Best RF params: {'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 200}
Best RF AUC: 0.9118451378868044
Saved best Random Forest model.


In [4]:
svc = SVC(probability=True, random_state=42)
param_dist = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto']
}

rs = RandomizedSearchCV(
    svc,
    param_distributions=param_dist,
    n_iter=6,
    cv=4,
    scoring='roc_auc',
    n_jobs=-1,
    random_state=42
)

rs.fit(X, y)
print('Best SVM params:', rs.best_params_)
print('Best SVM AUC:', rs.best_score_)


Best SVM params: {'kernel': 'linear', 'gamma': 'scale', 'C': 1}
Best SVM AUC: 0.9127485140397622


In [5]:
joblib.dump(rs.best_estimator_, 'models/best_svm.pkl')
print('Saved best SVM model.')

Saved best SVM model.
