In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

import pandas as pd

In [2]:
X, y = load_boston(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
                                                     random_state=3)

In [3]:
# pipeline
pipe = Pipeline([('preprocess', StandardScaler()),
                 ('model', Ridge())])

ridge_search = {'preprocess': Categorical([StandardScaler()]),
                'model': Categorical([Ridge(random_state=23)]),
                'model__alpha': Real(1e-6, 1e6, prior='log-uniform')}

lasso_search = {'preprocess': Categorical([StandardScaler()]),
                'model': Categorical([Lasso(random_state=23)]),
                'model__alpha': Real(1e-6, 1e6, prior='log-uniform')}

rf_search = {'preprocess': Categorical([None]),
             'model': Categorical([RandomForestRegressor(n_estimators=1000,
                                                         random_state=23)]),
             'model__max_features': Integer(1, 13),
             'model__min_samples_leaf': Integer(1, 150)}

In [4]:
opt = BayesSearchCV(pipe, scoring='neg_mean_squared_error',
                    n_jobs=-1, search_spaces=[(rf_search, 50),
                                                (ridge_search, 25),
                                                (lasso_search, 25)])

In [5]:
opt.fit(X_train, y_train)



In [6]:
opt.best_estimator_

Pipeline(steps=[('preprocess', None), ('model', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features=7, max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=1,
           oob_score=False, random_state=23, verbose=0, warm_start=False))])

In [7]:
opt.best_index_

40

In [8]:
opt.best_score_

-11.386471411292892

In [9]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

In [10]:
cv_results = pd.DataFrame.from_dict(opt.cv_results_, orient='index').transpose()

In [11]:
cv_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 21 columns):
split0_test_score                120 non-null object
split1_test_score                120 non-null object
split2_test_score                120 non-null object
mean_test_score                  120 non-null object
std_test_score                   120 non-null object
rank_test_score                  120 non-null object
split0_train_score               120 non-null object
split1_train_score               120 non-null object
split2_train_score               120 non-null object
mean_train_score                 120 non-null object
std_train_score                  120 non-null object
mean_fit_time                    120 non-null object
std_fit_time                     120 non-null object
mean_score_time                  120 non-null object
std_score_time                   120 non-null object
param_model                      120 non-null object
param_model__max_features        56 non-null ob

In [12]:
pd.options.display.max_columns = None

In [13]:
cv_results

Unnamed: 0,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model,param_model__max_features,param_model__min_samples_leaf,param_preprocess,params,param_model__alpha
0,-50.7133,-52.2483,-48.9553,-50.6392,1.34361,2,-48.7957,-47.6045,-47.9312,-48.1105,0.502571,1.57915,0.0345849,0.0925241,0.00968129,"(DecisionTreeRegressor(criterion='mse', max_de...",1,29,,{'model': (DecisionTreeRegressor(criterion='ms...,1.28773e-06
1,-88.776,-86.0324,-82.3769,-85.7365,2.62245,4,-83.9689,-85.3269,-87.2102,-85.502,1.32904,1.58134,0.0271465,0.0852067,0.00218268,"(DecisionTreeRegressor(criterion='mse', max_de...",7,125,,{'model': (DecisionTreeRegressor(criterion='ms...,1.89212e-06
2,-86.8775,-83.7639,-80.1385,-83.602,2.75535,3,-82.0867,-83.1711,-84.8108,-83.3562,1.11976,1.53316,0.171285,0.08258,0.000302061,"(DecisionTreeRegressor(criterion='mse', max_de...",9,85,,{'model': (DecisionTreeRegressor(criterion='ms...,98873.3
3,-88.776,-86.0324,-82.3769,-85.7365,2.62245,4,-83.9689,-85.3269,-87.2102,-85.502,1.32904,1.42394,0.0405954,0.0790826,0.00242244,"(DecisionTreeRegressor(criterion='mse', max_de...",13,89,,{'model': (DecisionTreeRegressor(criterion='ms...,0.00764325
4,-88.776,-86.0324,-82.3769,-85.7365,2.62245,4,-83.9689,-85.3269,-87.2102,-85.502,1.32904,1.54105,0.00371309,0.0738611,0.0120205,"(DecisionTreeRegressor(criterion='mse', max_de...",9,125,,{'model': (DecisionTreeRegressor(criterion='ms...,72843.5
5,-88.776,-86.0324,-82.3769,-85.7365,2.62245,4,-83.9689,-85.3269,-87.2102,-85.502,1.32904,1.50389,0.050936,0.0817646,0.00035772,"(DecisionTreeRegressor(criterion='mse', max_de...",12,127,,{'model': (DecisionTreeRegressor(criterion='ms...,0.00574369
6,-88.776,-86.0324,-82.3769,-85.7365,2.62245,4,-83.9689,-85.3269,-87.2102,-85.502,1.32904,1.53631,0.0575112,0.0744499,0.0146188,"(DecisionTreeRegressor(criterion='mse', max_de...",13,110,,{'model': (DecisionTreeRegressor(criterion='ms...,0.001493
7,-47.3266,-43.3979,-43.1161,-44.6207,1.92438,1,-42.331,-45.6707,-44.1698,-44.0572,1.36575,1.44699,0.0972327,0.0468646,0.000619456,"(DecisionTreeRegressor(criterion='mse', max_de...",10,63,,{'model': (DecisionTreeRegressor(criterion='ms...,199.264
8,-51.9851,-45.8006,-44.4385,-47.4202,3.28793,7,-45.8268,-49.0497,-46.3873,-47.0879,1.40591,1.56994,0.0416692,0.0764669,0.00891182,"(DecisionTreeRegressor(criterion='mse', max_de...",12,77,,{'model': (DecisionTreeRegressor(criterion='ms...,0.000215067
9,-88.776,-86.0324,-82.3769,-85.7365,2.62245,8,-83.9689,-85.3269,-87.2102,-85.502,1.32904,1.46925,0.0207066,0.0843807,0.00103969,"(DecisionTreeRegressor(criterion='mse', max_de...",5,149,,{'model': (DecisionTreeRegressor(criterion='ms...,0.00220952


In [14]:
cv_results.sort_values('rank_test_score')

Unnamed: 0,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model,param_model__max_features,param_model__min_samples_leaf,param_preprocess,params,param_model__alpha
59,-29.5969,-24.5318,-25.6881,-26.6135,2.16976,1,-21.3829,-24.0624,-22.3994,-22.6149,1.10446,0.00966001,0.00772464,0.000333865,3.20253e-05,"Ridge(alpha=0.0076432549944926006, copy_X=True...",,,"StandardScaler(copy=True, with_mean=True, with...","{'model': Ridge(alpha=0.0076432549944926006, c...",0.000808689
40,-12.4429,-11.4336,-10.2745,-11.3865,0.886431,1,-1.65453,-1.87034,-1.66251,-1.72913,0.0999039,2.20338,0.08885,0.0927446,0.01409,"(DecisionTreeRegressor(criterion='mse', max_de...",7,1,,{'model': (DecisionTreeRegressor(criterion='ms...,0.135904
53,-12.3149,-11.8926,-10.925,-11.7124,0.581883,1,-1.6696,-1.79899,-1.68386,-1.71748,0.0579269,2.17859,0.115964,0.117067,0.018813,"(DecisionTreeRegressor(criterion='mse', max_de...",5,1,,{'model': (DecisionTreeRegressor(criterion='ms...,0.00138687
24,-21.5723,-23.8063,-19.761,-21.7128,1.6523,1,-8.57359,-9.41425,-8.86135,-8.94973,0.348843,1.6413,0.0128847,0.0990169,0.00481188,"(DecisionTreeRegressor(criterion='mse', max_de...",1,2,,{'model': (DecisionTreeRegressor(criterion='ms...,6.59285e-06
22,-46.1334,-38.319,-40.3905,-41.6262,3.30925,1,-38.4882,-40.0192,-41.3991,-39.9688,1.18887,1.85166,0.022054,0.0728689,0.0176717,"(DecisionTreeRegressor(criterion='mse', max_de...",12,51,,{'model': (DecisionTreeRegressor(criterion='ms...,24.5436
21,-46.1334,-38.319,-40.3905,-41.6262,3.30925,1,-38.4882,-40.0192,-41.3991,-39.9688,1.18887,1.84367,0.0560546,0.0907603,0.00862781,"(DecisionTreeRegressor(criterion='mse', max_de...",12,51,,{'model': (DecisionTreeRegressor(criterion='ms...,3.15774
20,-46.1334,-38.319,-40.3905,-41.6262,3.30925,1,-38.4882,-40.0192,-41.3991,-39.9688,1.18887,1.81216,0.116782,0.0875514,0.00142838,"(DecisionTreeRegressor(criterion='mse', max_de...",12,51,,{'model': (DecisionTreeRegressor(criterion='ms...,8.00955e-05
19,-46.1334,-38.319,-40.3905,-41.6262,3.30925,1,-38.4882,-40.0192,-41.3991,-39.9688,1.18887,1.62806,0.0554325,0.0858192,0.00240366,"(DecisionTreeRegressor(criterion='mse', max_de...",12,51,,{'model': (DecisionTreeRegressor(criterion='ms...,9.32562
18,-46.1334,-38.319,-40.3905,-41.6262,3.30925,1,-38.4882,-40.0192,-41.3991,-39.9688,1.18887,1.77812,0.131621,0.0762237,0.0119958,"(DecisionTreeRegressor(criterion='mse', max_de...",12,51,,{'model': (DecisionTreeRegressor(criterion='ms...,20.6002
17,-46.1334,-38.319,-40.3905,-41.6262,3.30925,1,-38.4882,-40.0192,-41.3991,-39.9688,1.18887,1.69107,0.0645907,0.0897492,0.00312601,"(DecisionTreeRegressor(criterion='mse', max_de...",12,51,,{'model': (DecisionTreeRegressor(criterion='ms...,6.68367e-05


In [15]:
cv_results.sort_values('mean_test_score', ascending=False)

Unnamed: 0,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model,param_model__max_features,param_model__min_samples_leaf,param_preprocess,params,param_model__alpha
40,-12.4429,-11.4336,-10.2745,-11.3865,0.886431,1,-1.65453,-1.87034,-1.66251,-1.72913,0.0999039,2.20338,0.08885,0.0927446,0.01409,"(DecisionTreeRegressor(criterion='mse', max_de...",7,1,,{'model': (DecisionTreeRegressor(criterion='ms...,0.135904
53,-12.3149,-11.8926,-10.925,-11.7124,0.581883,1,-1.6696,-1.79899,-1.68386,-1.71748,0.0579269,2.17859,0.115964,0.117067,0.018813,"(DecisionTreeRegressor(criterion='mse', max_de...",5,1,,{'model': (DecisionTreeRegressor(criterion='ms...,0.00138687
49,-12.8554,-11.815,-10.8368,-11.8384,0.8248,2,-1.65979,-1.79869,-1.77098,-1.74315,0.0600234,2.45964,0.024541,0.101671,0.00823824,"(DecisionTreeRegressor(criterion='mse', max_de...",10,1,,{'model': (DecisionTreeRegressor(criterion='ms...,0.000107876
48,-12.7753,-11.9803,-11.1325,-11.9648,0.671189,3,-1.73675,-1.84431,-1.70293,-1.76133,0.0602781,1.9789,0.0368014,0.0911065,0.0151309,"(DecisionTreeRegressor(criterion='mse', max_de...",4,1,,{'model': (DecisionTreeRegressor(criterion='ms...,0.00392893
32,-13.3226,-11.9039,-10.9569,-12.0644,0.973032,1,-1.60766,-1.77553,-1.76551,-1.71624,0.0768844,2.82172,0.0957889,0.100315,0.00538975,"(DecisionTreeRegressor(criterion='mse', max_de...",13,1,,{'model': (DecisionTreeRegressor(criterion='ms...,14612.5
36,-14.3887,-12.7346,-12.1667,-13.1001,0.943634,2,-2.7899,-3.45191,-3.25236,-3.16472,0.277277,2.74763,0.0860603,0.100538,0.00717235,"(DecisionTreeRegressor(criterion='mse', max_de...",13,2,,{'model': (DecisionTreeRegressor(criterion='ms...,0.194424
54,-14.2992,-13.7439,-13.8794,-13.975,0.236662,4,-4.79747,-5.9097,-5.25825,-5.3218,0.456285,2.06447,0.0336294,0.0807774,0.0182533,"(DecisionTreeRegressor(criterion='mse', max_de...",8,3,,{'model': (DecisionTreeRegressor(criterion='ms...,5.25771e-05
55,-14.8769,-14.5495,-14.8878,-14.7717,0.156864,5,-6.85418,-7.96408,-7.12675,-7.315,0.472262,1.84472,0.151951,0.0644566,0.0182445,"(DecisionTreeRegressor(criterion='mse', max_de...",6,4,,{'model': (DecisionTreeRegressor(criterion='ms...,1.49515e-06
50,-15.6897,-16.0094,-16.0533,-15.9169,0.162256,6,-8.91475,-9.83262,-9.11352,-9.28696,0.394277,1.8346,0.0291861,0.0915587,0.00100692,"(DecisionTreeRegressor(criterion='mse', max_de...",5,5,,{'model': (DecisionTreeRegressor(criterion='ms...,0.049303
51,-16.283,-15.4416,-16.7759,-16.1671,0.550176,7,-9.5148,-10.8245,-9.63801,-9.99242,0.590489,1.86902,0.0674135,0.0921139,0.000723305,"(DecisionTreeRegressor(criterion='mse', max_de...",7,6,,{'model': (DecisionTreeRegressor(criterion='ms...,8.03624e-06


In [16]:
len(opt.search_spaces_)

3

In [17]:
opt.search_spaces_

{0: ({'model': Categorical(categories=[RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
              max_features='auto', max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              n_estimators=1000, n_jobs=1, oob_score=False, random_state=23,
              verbose=0, warm_start=False)], prior=None),
   'model__max_features': Integer(low=1, high=13),
   'model__min_samples_leaf': Integer(low=1, high=150),
   'preprocess': Categorical(categories=[None], prior=None)},
  50),
 1: ({'model': Categorical(categories=[Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=23, solver='auto', tol=0.001)], prior=None),
   'model__alpha': Real(low=1e-06, high=1000000.0, prior=log-uniform, transform=identity),
   'preprocess': Categorical(categories=[StandardScaler(copy=True, with_mean=True, with_std=True)], prior=None)},
  25),
 