In [1]:
# Cell 1: Import libraries and load data
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the data and split it again
df = pd.read_csv('../data/final_selected_features_heart_disease.csv')
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [2]:
# Cell 2: Perform Grid Search for Random Forest
# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the model and GridSearchCV
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, 
                           cv=5, n_jobs=-1, verbose=2, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Get the best parameters
print("\nBest Parameters found by GridSearchCV:")
print(grid_search.best_params_)

# Get the best model
best_rf_model = grid_search.best_estimator_

Fitting 5 folds for each of 81 candidates, totalling 405 fits

Best Parameters found by GridSearchCV:
{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}


In [3]:
# Cell 3: Evaluate the tuned model
y_pred_tuned = best_rf_model.predict(X_test)
print("\nClassification Report for the Tuned Random Forest Model:")
print(classification_report(y_test, y_pred_tuned))


Classification Report for the Tuned Random Forest Model:
              precision    recall  f1-score   support

           0       0.87      0.79      0.83        33
           1       0.77      0.86      0.81        28

    accuracy                           0.82        61
   macro avg       0.82      0.82      0.82        61
weighted avg       0.82      0.82      0.82        61



In [4]:
# Cell 4: Save the final model
import joblib

# The model is saved in the 'models' folder
joblib.dump(best_rf_model, '../models/final_model.pkl')

print("\nFinal tuned model saved successfully to models/final_model.pkl")


Final tuned model saved successfully to models/final_model.pkl
