In [1]:
# Libraries

import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import make_scorer, fbeta_score, accuracy_score
from model_selection.functional_grid import FunctionalGridSearch
import multiprocessing
from model_selection.logs import logs_by_fold, logs_by_candidate
multiprocessing.set_start_method(method='fork', force=True)

### Functional GridSearchCV Setup

In [2]:
# Load dataset
X,y = load_breast_cancer(return_X_y=True, as_frame=True)

# Define an estimator
estimator = RandomForestClassifier(random_state=1)

# Grid with hyperparameter values 
param_grid = {
    "n_estimators": [100, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [1, 2],
    "bootstrap": [True]
}

# Two performance metrics to monitor during grid search
f2_scorer = make_scorer(fbeta_score, beta=2)
acc_scorer = make_scorer(accuracy_score)

# GridSearchCV
search = FunctionalGridSearch(
    estimator=estimator,
    param_grid=param_grid,
    cv=5,
    scoring={'f2': f2_scorer, 'accuracy': acc_scorer},
    n_jobs=1,
    return_train_score=True,
    refit='f2',
    calibrate=True,
    verbose=-1,
    R=4,       
    N=1_000_000 
).fit(X, y)

### Results

<ol>
    <li>GridSearchCV results. </li>
    <p></p>
    <li>Best estimator based on the refitted performance metric.</li>
    <p></p>
    <li>Best estimator based on the one-standard-error-rule.</li>
    <p></p>
    <li>Best estimator based on the one-standard-error-rule and calibration.</li>
    <p></p>
    <li>Calibration results.</li>
    <p></p>
    <li>Execution traces (general, grouped by candidate, group by fold).</li>
</ol>

In [3]:
# Functional GridSearchCV results

pd.DataFrame(search.cv_results_).sort_values(by='mean_test_f2', ascending=False)

Unnamed: 0,params,param_n_estimators,param_max_depth,param_min_samples_split,param_min_samples_leaf,param_bootstrap,split0_fit_time,split1_fit_time,split2_fit_time,split3_fit_time,...,split0_train_accuracy,split1_train_accuracy,split2_train_accuracy,split3_train_accuracy,split4_train_accuracy,mean_test_accuracy,std_test_accuracy,mean_train_accuracy,std_train_accuracy,rank_test_accuracy
12,"{'n_estimators': 300, 'max_depth': None, 'min_...",300,,2,1,True,0.181966,0.183794,0.189772,0.185258,...,1.0,1.0,1.0,1.0,1.0,0.963111,0.02103,1.0,0.0,3
20,"{'n_estimators': 300, 'max_depth': 20, 'min_sa...",300,20.0,2,1,True,0.22857,0.201852,0.201161,0.189953,...,1.0,1.0,1.0,1.0,1.0,0.963111,0.02103,1.0,0.0,1
16,"{'n_estimators': 300, 'max_depth': 10, 'min_sa...",300,10.0,2,1,True,0.180568,0.184812,0.188831,0.185529,...,1.0,1.0,1.0,1.0,1.0,0.963111,0.02103,1.0,0.0,2
9,"{'n_estimators': 100, 'max_depth': 20, 'min_sa...",100,20.0,2,2,True,0.059771,0.060401,0.061398,0.061551,...,0.995604,0.993407,0.993407,0.993407,0.993421,0.959587,0.01423,0.993849,0.000878,15
5,"{'n_estimators': 100, 'max_depth': 10, 'min_sa...",100,10.0,2,2,True,0.059376,0.060676,0.062121,0.060575,...,0.995604,0.993407,0.993407,0.993407,0.993421,0.959587,0.01423,0.993849,0.000878,12
1,"{'n_estimators': 100, 'max_depth': None, 'min_...",100,,2,2,True,0.059936,0.060613,0.06292,0.060506,...,0.995604,0.993407,0.993407,0.993407,0.993421,0.959587,0.01423,0.993849,0.000878,14
2,"{'n_estimators': 100, 'max_depth': None, 'min_...",100,,5,1,True,0.060874,0.061751,0.062458,0.061137,...,0.997802,0.995604,0.997802,1.0,0.997807,0.957833,0.020287,0.997803,0.00139,23
6,"{'n_estimators': 100, 'max_depth': 10, 'min_sa...",100,10.0,5,1,True,0.060391,0.060981,0.062753,0.061595,...,0.997802,0.995604,0.997802,1.0,0.997807,0.957833,0.020287,0.997803,0.00139,21
10,"{'n_estimators': 100, 'max_depth': 20, 'min_sa...",100,20.0,5,1,True,0.060983,0.062626,0.063749,0.061311,...,0.997802,0.995604,0.997802,1.0,0.997807,0.957833,0.020287,0.997803,0.00139,19
0,"{'n_estimators': 100, 'max_depth': None, 'min_...",100,,2,1,True,0.069402,0.061372,0.063943,0.062611,...,1.0,1.0,1.0,1.0,1.0,0.959587,0.018878,1.0,0.0,7


In [4]:
# Display best estimator

search.best_estimator_

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


### Maybe Monad

In [7]:
# Display best calibrated estimator 

search.best_calibrated_estimator_.value

0,1,2
,estimator,RandomForestC...andom_state=1)
,method,'isotonic'
,cv,5
,n_jobs,
,ensemble,'auto'

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [8]:
# Display best least complex model

search.one_se_estimator_.value

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [9]:
# Display best calibrated least complex model

search.one_se_calibrated_estimator_.value

0,1,2
,estimator,RandomForestC...andom_state=1)
,method,'isotonic'
,cv,5
,n_jobs,
,ensemble,'auto'

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [5]:
# Display best calibrated least complex model's calibration results

if search.one_se_calibration_results_.is_just:
    print(search.one_se_calibration_results_.value)

{'best_model': CalibratedClassifierCV(cv=5, estimator=RandomForestClassifier(random_state=1),
                       method='isotonic'), 'best_method': 'isotonic', 'best_ece': 0.015441967240078257, 'isotonic_ece': 0.015441967240078257, 'sigmoid_ece': 0.036700749736940444}


### Writer Monad

In [6]:
search.get_execution_logs()


{'by_fold': {0: [{'stage': 'fit_and_score',
    'event': 'estimator_cloned',
    'candidate_index': 0,
    'split_index': 0,
    'params': {'n_estimators': 100,
     'max_depth': None,
     'min_samples_split': 2,
     'min_samples_leaf': 1,
     'bootstrap': True}},
   {'stage': 'timing',
    'event': 'timing_recorded',
    'candidate_index': 0,
    'split_index': 0,
    'fit_time': 0.071807861328125,
    'score_time': 0.007704973220825195},
   {'stage': 'fit_and_score',
    'event': 'estimator_cloned',
    'candidate_index': 1,
    'split_index': 0,
    'params': {'n_estimators': 100,
     'max_depth': None,
     'min_samples_split': 2,
     'min_samples_leaf': 2,
     'bootstrap': True}},
   {'stage': 'timing',
    'event': 'timing_recorded',
    'candidate_index': 1,
    'split_index': 0,
    'fit_time': 0.06125473976135254,
    'score_time': 0.007403850555419922},
   {'stage': 'fit_and_score',
    'event': 'estimator_cloned',
    'candidate_index': 2,
    'split_index': 0,
    'pa

In [4]:
search.get_execution_logs()["by_candidate"]

{0: [{'stage': 'fit_and_score',
   'event': 'estimator_cloned',
   'candidate_index': 0,
   'split_index': 0,
   'params': {'n_estimators': 100,
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'bootstrap': True}},
  {'stage': 'predict',
   'event': 'predict_timed',
   'candidate_index': 0,
   'split_index': 0,
   'predict_kind': 'predict_proba',
   'predict_time': 0.0012888908386230469},
  {'stage': 'timing',
   'event': 'timing_recorded',
   'candidate_index': 0,
   'split_index': 0,
   'fit_time': 0.06940221786499023,
   'predict_time': 0.0012888908386230469,
   'score_time': 0.00710296630859375},
  {'stage': 'fit_and_score',
   'event': 'estimator_cloned',
   'candidate_index': 0,
   'split_index': 1,
   'params': {'n_estimators': 100,
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'bootstrap': True}},
  {'stage': 'predict',
   'event': 'predict_timed',
   'candidate_index': 0,
   'split_index': 1,
   'predict_kind

In [8]:
search.get_execution_logs()["by_fold"]

{0: [{'stage': 'fit_and_score',
   'event': 'estimator_cloned',
   'candidate_index': 0,
   'split_index': 0,
   'params': {'n_estimators': 100,
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'bootstrap': True}},
  {'stage': 'timing',
   'event': 'timing_recorded',
   'candidate_index': 0,
   'split_index': 0,
   'fit_time': 0.071807861328125,
   'score_time': 0.007704973220825195},
  {'stage': 'fit_and_score',
   'event': 'estimator_cloned',
   'candidate_index': 1,
   'split_index': 0,
   'params': {'n_estimators': 100,
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 2,
    'bootstrap': True}},
  {'stage': 'timing',
   'event': 'timing_recorded',
   'candidate_index': 1,
   'split_index': 0,
   'fit_time': 0.06125473976135254,
   'score_time': 0.007403850555419922},
  {'stage': 'fit_and_score',
   'event': 'estimator_cloned',
   'candidate_index': 2,
   'split_index': 0,
   'params': {'n_estimators': 100,
    'max_depth': 