In [None]:
# In notebook: 02b_model_hyperparameter_tuning.ipynb
import pandas as pd
import joblib
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

# --- Load Data ---
PROCESSED_DATA_DIR = os.path.join(r"D:\Satvik\Projects\College\Minor\Code\backend", "data", "processed")
X_train = joblib.load(os.path.join(PROCESSED_DATA_DIR, "X_train.joblib"))
y_train = joblib.load(os.path.join(PROCESSED_DATA_DIR, "y_train.joblib"))

# --- Define Parameter Grid ---
# This tells RandomizedSearchCV which parameters to try and what ranges to try them in.
param_dist = {
    'n_estimators': randint(100, 500),
    'max_depth': randint(5, 20),
    'min_samples_split': randint(2, 10),
    'min_samples_leaf': randint(1, 4)
}

# --- Set up Randomized Search ---
rf = RandomForestClassifier(random_state=42)
rand_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=50,  # Try 50 different combinations
    cv=5,       # 5-fold cross-validation
    random_state=42,
    n_jobs=-1,  # Use all available CPU cores
    scoring='roc_auc' # We care most about AUC!
)

# --- Run the Search ---
print("Starting hyperparameter search...")
rand_search.fit(X_train, y_train)
print("Search complete.")

# --- Get Best Model ---
print(f"Best AUC Score from search: {rand_search.best_score_:.4f}")
print("Best parameters found:")
print(rand_search.best_params_)

best_rf_tuned = rand_search.best_estimator_

# --- Save the NEW Best Model ---
MODEL_PATH = os.path.join(r"D:\Satvik\Projects\College\Minor\Code\backend", "models", "best_heart_disease_classifier.joblib")
joblib.dump(best_rf_tuned, MODEL_PATH)
print(f"\nTUNED best model saved successfully to: {MODEL_PATH}")

Starting hyperparameter search...
Search complete.
Best AUC Score from search: 0.9282
Best parameters found:
{'max_depth': 14, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 369}

TUNED best model saved successfully to: D:\Satvik\Projects\College\Minor\Code\backend\models\best_heart_disease_classifier.joblib
