In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from scipy.stats import randint

In [2]:
x, y = make_classification(n_samples=1000, n_classes=2, weights=[0.8, 0.2], random_state=42)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

model = RandomForestClassifier(n_estimators=101, random_state=42)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

print("\nClassification Report (imbalanced):")
print(classification_report(y_test, y_pred))


Classification Report (imbalanced):
              precision    recall  f1-score   support

           0       0.91      0.99      0.95       157
           1       0.93      0.63      0.75        43

    accuracy                           0.91       200
   macro avg       0.92      0.81      0.85       200
weighted avg       0.91      0.91      0.90       200



**Grid Search**

In [3]:
param_gird = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20, None],
    'bootstrap': [True, False],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    rf,
    param_gird,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(x_train, y_train)
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.3f}")

best_model = grid_search.best_estimator_
y_pred_grid_best = best_model.predict(x_test)
print(classification_report(y_test, y_pred_grid_best))

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Best parameters: {'bootstrap': False, 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
Best score: 0.862
              precision    recall  f1-score   support

           0       0.91      0.99      0.95       157
           1       0.96      0.63      0.76        43

    accuracy                           0.92       200
   macro avg       0.94      0.81      0.85       200
weighted avg       0.92      0.92      0.91       200



**Random Search**

In [6]:
param_dist = {
    'n_estimators': randint(50, 300),
    'max_depth': randint(5, 30),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10)
}

random_search = RandomizedSearchCV(
    rf,
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    random_state=42
)

random_search.fit(x_train, y_train)
print(f"Best parameters: {random_search.best_params_}")

best_model_random = random_search.best_estimator_
y_pred_random_best = best_model_random.predict(x_test)
print(classification_report(y_test, y_pred_random_best))

Best parameters: {'max_depth': 19, 'min_samples_leaf': 1, 'min_samples_split': 8, 'n_estimators': 58}
              precision    recall  f1-score   support

           0       0.90      0.98      0.94       157
           1       0.90      0.60      0.72        43

    accuracy                           0.90       200
   macro avg       0.90      0.79      0.83       200
weighted avg       0.90      0.90      0.89       200



**Bayesian Optimization**

In [13]:
from skopt import gp_minimize
from skopt.space import Integer, Real
from skopt.utils import use_named_args

space = [
    Integer(50, 300, name='n_estimators'),
    Integer(5, 30, name='max_depth'),
    Integer(2, 20, name='min_samples_split'),
    Integer(1, 10, name='min_samples_leaf')
]

@use_named_args(space)
def objective(**params):
    rf = RandomForestClassifier(**params, random_state=42)
    scores = cross_val_score(rf, x_train, y_train, cv=5, scoring='f1')
    return -scores.mean()


result = gp_minimize(objective, space, n_calls=50, random_state=42)
print(f"Best parameters: {result.x}")

Best parameters: [np.int64(275), np.int64(27), np.int64(2), np.int64(1)]
