In [2]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

data = pd.read_csv("heart.csv")
data.head(7)

X = data.drop("target", axis=1)
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = RandomForestClassifier(
    n_estimators=100, 
    max_features="sqrt", 
    max_depth=6, 
    max_leaf_nodes=6
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       0.72      0.83      0.77       114
           1       0.85      0.74      0.79       143

    accuracy                           0.78       257
   macro avg       0.78      0.79      0.78       257
weighted avg       0.79      0.78      0.78       257



In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

param_grid = {
    'n_estimators': [100, 200],      # number of trees
    'max_depth': [None, 10, 20],     # tree depth
    'min_samples_split': [2, 5],     # min samples to split
    'min_samples_leaf': [1, 2],      # min samples per leaf
    'bootstrap': [True, False]       # bootstrap sampling
}

# Speed up: use all cores, add verbosity to track progress
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=3,                # reduce from 5 to 3 to cut runtime
    n_jobs=-1,           # use all CPU cores
    verbose=2            # show progress
)

grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Estimator:", grid_search.best_estimator_)


Fitting 3 folds for each of 48 candidates, totalling 144 fits
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.6s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.6s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total

In [5]:
model_grid = RandomForestClassifier(max_depth=3,
                                    max_features="log2",
                                    max_leaf_nodes=3,
                                    n_estimators=50)
model_grid.fit(X_train, y_train)
y_pred_grid = model.predict(X_test)
print(classification_report(y_pred_grid, y_test))

              precision    recall  f1-score   support

           0       0.72      0.83      0.77       114
           1       0.85      0.74      0.79       143

    accuracy                           0.78       257
   macro avg       0.78      0.79      0.78       257
weighted avg       0.79      0.78      0.78       257



In [6]:
random_search = RandomizedSearchCV(RandomForestClassifier(),
                                   param_grid)
random_search.fit(X_train, y_train)
print(random_search.best_estimator_)

RandomForestClassifier(bootstrap=False, max_depth=20, min_samples_split=5)


In [7]:
model_random = RandomForestClassifier(max_depth=3,
                                      max_features='log2',
                                      max_leaf_nodes=6,
                                      n_estimators=100)
model_random.fit(X_train, y_train)
y_pred_rand = model.predict(X_test)
print(classification_report(y_pred_rand, y_test))

              precision    recall  f1-score   support

           0       0.72      0.83      0.77       114
           1       0.85      0.74      0.79       143

    accuracy                           0.78       257
   macro avg       0.78      0.79      0.78       257
weighted avg       0.79      0.78      0.78       257

