In [1]:
# Libraries

import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import make_scorer, fbeta_score, accuracy_score
from model_selection.functional_grid import FunctionalGridSearch
import multiprocessing
multiprocessing.set_start_method(method='fork', force=True)

### Functional GridSearchCV Setup

In [None]:
# Load dataset


X,y = load_breast_cancer(return_X_y=True, as_frame=True)

# Define an estimator

estimator = RandomForestClassifier(random_state=1)

# Grid with hyperparameter values 

param_grid = {
    "n_estimators": [100, 300],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [1, 2],
    "bootstrap": [True]
}

# Two performance metrics to monitor during grid search

f2_scorer = make_scorer(fbeta_score, beta=2)
acc_scorer = make_scorer(accuracy_score)

# GridSearchCV

search = FunctionalGridSearch(
    estimator=estimator,
    param_grid=param_grid,
    cv=5,
    scoring={'f2':f2_scorer, 'accuracy':acc_scorer},        
    n_jobs=1,           
    return_train_score=True,
    refit='f2',
    calibrate=True,      
    verbose=-1
).fit(X,y)

### Results

<ol>
    <li>GridSearchCV results. </li>
    <p></p>
    <li>Best estimator based on the refitted performance metric.</li>
    <p></p>
    <li>Best estimator based on the one-standard-error-rule.</li>
    <p></p>
    <li>Best estimator based on the one-standard-error-rule and calibration.</li>
    <p></p>
    <li>Calibration results.</li>
</ol>

In [7]:
# Functional GridSearchCV results

pd.DataFrame(search.cv_results_).sort_values(by='mean_test_f2', ascending=False)

Unnamed: 0,params,param_n_estimators,param_max_depth,param_min_samples_split,param_min_samples_leaf,param_bootstrap,split0_fit_time,split1_fit_time,split2_fit_time,split3_fit_time,...,split0_train_accuracy,split1_train_accuracy,split2_train_accuracy,split3_train_accuracy,split4_train_accuracy,mean_test_accuracy,std_test_accuracy,mean_train_accuracy,std_train_accuracy,rank_test_accuracy
12,"{'n_estimators': 300, 'max_depth': None, 'min_...",300,,2,1,True,0.194592,0.194892,0.217629,0.21787,...,1.0,1.0,1.0,1.0,1.0,0.963111,0.02103,1.0,0.0,3
20,"{'n_estimators': 300, 'max_depth': 20, 'min_sa...",300,20.0,2,1,True,0.187801,0.18802,0.193936,0.188049,...,1.0,1.0,1.0,1.0,1.0,0.963111,0.02103,1.0,0.0,1
16,"{'n_estimators': 300, 'max_depth': 10, 'min_sa...",300,10.0,2,1,True,0.186739,0.18889,0.193303,0.189359,...,1.0,1.0,1.0,1.0,1.0,0.963111,0.02103,1.0,0.0,2
9,"{'n_estimators': 100, 'max_depth': 20, 'min_sa...",100,20.0,2,2,True,0.061554,0.0621,0.063885,0.062953,...,0.995604,0.993407,0.993407,0.993407,0.993421,0.959587,0.01423,0.993849,0.000878,15
5,"{'n_estimators': 100, 'max_depth': 10, 'min_sa...",100,10.0,2,2,True,0.06166,0.062129,0.063612,0.062487,...,0.995604,0.993407,0.993407,0.993407,0.993421,0.959587,0.01423,0.993849,0.000878,12
1,"{'n_estimators': 100, 'max_depth': None, 'min_...",100,,2,2,True,0.061252,0.061804,0.063581,0.062373,...,0.995604,0.993407,0.993407,0.993407,0.993421,0.959587,0.01423,0.993849,0.000878,14
2,"{'n_estimators': 100, 'max_depth': None, 'min_...",100,,5,1,True,0.062444,0.0629,0.065664,0.063723,...,0.997802,0.995604,0.997802,1.0,0.997807,0.957833,0.020287,0.997803,0.00139,23
6,"{'n_estimators': 100, 'max_depth': 10, 'min_sa...",100,10.0,5,1,True,0.062184,0.062702,0.065454,0.063488,...,0.997802,0.995604,0.997802,1.0,0.997807,0.957833,0.020287,0.997803,0.00139,21
10,"{'n_estimators': 100, 'max_depth': 20, 'min_sa...",100,20.0,5,1,True,0.061969,0.063567,0.064804,0.063592,...,0.997802,0.995604,0.997802,1.0,0.997807,0.957833,0.020287,0.997803,0.00139,19
0,"{'n_estimators': 100, 'max_depth': None, 'min_...",100,,2,1,True,0.072305,0.06368,0.066244,0.064324,...,1.0,1.0,1.0,1.0,1.0,0.959587,0.018878,1.0,0.0,7


In [10]:
# Display best estimator

search.best_estimator_

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [11]:
# Display best calibrated estimator 

search.best_calibrated_estimator_

0,1,2
,estimator,RandomForestC...andom_state=1)
,method,'isotonic'
,cv,5
,n_jobs,
,ensemble,'auto'

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [12]:
# Display best least complex model

search.one_se_estimator_

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [13]:
# Display best calibrated least complex model

search.one_se_calibrated_estimator_

0,1,2
,estimator,RandomForestC...andom_state=1)
,method,'isotonic'
,cv,5
,n_jobs,
,ensemble,'auto'

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [14]:
# Display best calibrated least complex model's calibration results

search.one_se_calibration_results_

{'best_model': CalibratedClassifierCV(cv=5, estimator=RandomForestClassifier(random_state=1),
                        method='isotonic'),
 'best_method': 'isotonic',
 'best_ece': 0.015441967240078257,
 'isotonic_ece': 0.015441967240078257,
 'sigmoid_ece': 0.036700749736940444}