In [1]:

from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [2]:
# import for LHD method
import sys
sys.path.insert(1, "/Users/lazayxc/Documents/GitHub/")
from hypercube.core.base_LHDmaximin import LHSTuner

import numpy as np
import pandas as pd

In [3]:
# Generate the dataset
seed = 42
x, y = make_classification(n_samples=300, n_features=10, class_sep=0.5, 
                           random_state=seed)



In [7]:

clf = RandomForestClassifier(random_state=seed)

Grid Search Design

In [15]:
param_space = {
    "n_estimators": (30, 100),
    "min_samples_leaf": (2, 6)
}

grid_search = GridSearchCV(clf, param_grid={
'n_estimators': range(param_space['n_estimators'][0], param_space['n_estimators'][1] + 1),
    'min_samples_leaf': range(param_space['min_samples_leaf'][0], param_space['min_samples_leaf'][1] + 1)

}, cv=5)

In [17]:
grid_search.fit(x, y)

In [34]:
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print(best_model)


RandomForestClassifier(min_samples_leaf=2, n_estimators=30, random_state=42)


In [28]:
best_index = grid_search.best_index_

# Extracting the individual CV scores for the best parameter set
best_cv_scores_grid = [
    grid_search.cv_results_[f'split{i}_test_score'][best_index] for i in range(5)
]

# Total number of runs
total_runs_grid = len(grid_search.cv_results_['mean_test_score']) * grid_search.cv

print("Best CV Score:", best_cv_scores_grid)
print("Total Number of Runs:", total_runs_grid)

Best CV Score: [0.8833333333333333, 0.6666666666666666, 0.8333333333333334, 0.9, 0.9]
Total Number of Runs: 1775


LHD Maximin Design

In [32]:
tune = LHSTuner(clf, param_space, "recall", cv=5)
tune.fit(x, y, method='lm_fit')

In [33]:
tune.summary()

total_runs_lhd = tune.n_samples * tune.cv
print("Total Number of Runs:", total_runs_lhd)

Location:
                      coef   p_val       R^2
intercept         0.845885  0.0000  0.189348
n_estimators      0.000168  0.2432          
min_samples_leaf -0.000103  0.9650          

Dispersion:
                      coef   p_val       R^2
intercept        -5.528390  0.0000  0.353707
n_estimators     -0.010396  0.0940          
min_samples_leaf  0.017948  0.8515          

Best parameter combination: {'n_estimators': 60, 'min_samples_leaf': 3}
Best CV scores: [0.8519 0.8438 0.9333 0.8214 0.8824]
Total Number of Runs: 50
