In [1]:
import sys, os 
current_dir = os.getcwd()
path = os.path.dirname(current_dir)
sys.path.append(path)

from ml_workflow.calibrated_pipeline_hyperopt_cv import CalibratedPipelineHyperOptCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import numpy as np
import pandas as pd 

In [2]:
X,y = make_classification(n_samples=10000, random_state=42, class_sep=0.7)
X = pd.DataFrame(X)

In [3]:
# Create a hyperparameter grid to search over. In this case, 
# I am searching over hyperparameters from a random forest. 
param_grid = {  'n_estimators' : [100,150,300,400,500], 
                'max_depth' : [6,8,10,15,20],
                'max_features' : [5,6,8,10],
                'min_samples_split' : [4,5,8,10,15,20,25,50],
                'min_samples_leaf' : [4,5,8,10,15,20,25,50],
             }

# Initialize the estimator that will be using.
estimator = RandomForestClassifier(n_jobs=12, random_state=30, criterion = 'entropy',) 

clf = CalibratedPipelineHyperOptCV( base_estimator = estimator,  
                                    param_grid = param_grid,
                                    imputer=None, 
                                    scaler = None,
                                    resample='under',
                                    n_jobs=1,
                                    max_iter=15,
                                    hyperopt='atpe', 
                                    scorer_kwargs = {'known_skew': np.mean(y)}, 
                                  )

In [4]:
clf.fit(X,y)

 14%|█▍        | 14/100 [02:05<12:50,  8.96s/trial, best loss: 0.39305483733579577]


In [5]:
df = pd.read_pickle('hyperopt_results.pkl')

In [8]:
df.sort_values('loss')

Unnamed: 0,loss,loss_variance,iteration,train_time,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators
7,0.393055,0.000127,8,5.662017,10,10,8,4,150
6,0.3935,0.000147,7,5.807931,10,10,4,10,150
5,0.393869,0.000123,6,10.572412,10,10,10,25,300
11,0.396222,0.000113,12,11.012651,20,10,15,5,300
3,0.396672,0.000115,4,17.095741,8,10,8,5,500
9,0.396704,0.000189,10,11.035501,15,6,4,8,400
12,0.39755,0.000132,13,5.934397,20,10,15,25,150
8,0.39969,0.000152,9,11.092741,8,8,5,8,400
2,0.411619,9.1e-05,3,11.418071,6,10,20,10,400
0,0.418903,0.000165,1,10.312242,8,6,20,25,400
