In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from sklearn.datasets import load_iris, load_digits
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

import pandas as pd

X, y = load_digits(10, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0)

# used to try different model classes
pipe = Pipeline([
    ('model', SVC())
])

# single categorical value of 'model' parameter is used  to set the model class
lin_search = {
    'model': Categorical([LinearSVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
}

dtc_search = {
    'model': Categorical([DecisionTreeClassifier()]),
    'model__max_depth': Integer(1,32),
    'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
}

svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1,8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}

In [3]:
opt = BayesSearchCV(
    pipe,
    [(lin_search, 16), (dtc_search, 24), (svc_search, 32)], # (parameter space, # of evaluations)
    n_jobs=-1
)

opt.fit(X_train, y_train)

In [4]:
opt.best_estimator_

Pipeline(steps=[('model', SVC(C=0.0012587583006411721, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=7, gamma=9.9999999999999995e-07,
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [5]:
type(opt.cv_results_)

collections.defaultdict

In [6]:
opt.cv_results_.keys()

dict_keys(['split0_test_score', 'split1_test_score', 'split2_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'mean_train_score', 'std_train_score', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_model', 'param_model__C', 'params', 'param_model__max_depth', 'param_model__min_samples_split', 'param_model__degree', 'param_model__gamma', 'param_model__kernel'])

In [7]:
len(opt.cv_results_.keys())

23

In [8]:
len(opt.cv_results_.items())

23

In [9]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

In [10]:
cv_results = pd.DataFrame.from_dict(opt.cv_results_)

ValueError: arrays must all be same length

In [11]:
for k, v in opt.cv_results_.items():
    print(k, len(v))

split0_test_score 72
split1_test_score 72
split2_test_score 72
mean_test_score 72
std_test_score 72
rank_test_score 72
split0_train_score 72
split1_train_score 72
split2_train_score 72
mean_train_score 72
std_train_score 72
mean_fit_time 72
std_fit_time 72
mean_score_time 72
std_score_time 72
param_model 72
param_model__C 48
params 72
param_model__max_depth 24
param_model__min_samples_split 24
param_model__degree 32
param_model__gamma 32
param_model__kernel 32


In [12]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

### Hacky solutions from [here](https://stackoverflow.com/questions/19736080/creating-dataframe-from-a-dictionary-where-entries-have-different-lengths)

In [13]:
cv_results = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in opt.cv_results_.items()]))

In [14]:
cv_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
mean_fit_time                     72 non-null float64
mean_score_time                   72 non-null float64
mean_test_score                   72 non-null float64
mean_train_score                  72 non-null float64
param_model                       72 non-null object
param_model__C                    48 non-null float64
param_model__degree               32 non-null float64
param_model__gamma                32 non-null float64
param_model__kernel               32 non-null object
param_model__max_depth            24 non-null float64
param_model__min_samples_split    24 non-null float64
params                            72 non-null object
rank_test_score                   72 non-null int64
split0_test_score                 72 non-null float64
split0_train_score                72 non-null float64
split1_test_score                 72 non-null float64
split1_train_score                72 n

In [15]:
cv_results2 = pd.DataFrame.from_dict(opt.cv_results_, orient='index').transpose()

In [16]:
cv_results2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
split0_test_score                 72 non-null object
split1_test_score                 72 non-null object
split2_test_score                 72 non-null object
mean_test_score                   72 non-null object
std_test_score                    72 non-null object
rank_test_score                   72 non-null object
split0_train_score                72 non-null object
split1_train_score                72 non-null object
split2_train_score                72 non-null object
mean_train_score                  72 non-null object
std_train_score                   72 non-null object
mean_fit_time                     72 non-null object
std_fit_time                      72 non-null object
mean_score_time                   72 non-null object
std_score_time                    72 non-null object
param_model                       72 non-null object
param_model__C                    48 non-null obj

In [17]:
pd.options.display.max_columns = None

In [18]:
cv_results.sort_values('rank_test_score')

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__C,param_model__degree,param_model__gamma,param_model__kernel,param_model__max_depth,param_model__min_samples_split,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
22,0.010681,0.000298,0.847810,0.958412,"DecisionTreeClassifier(class_weight=None, crit...",905.688142,1.0,0.000001,poly,1.0,0.053418,{'model': DecisionTreeClassifier(class_weight=...,1,0.858407,0.949721,0.855234,0.963252,0.829596,0.962264,0.000232,3.394152e-06,0.012880,0.006159
1,0.032809,0.021864,0.962138,0.986274,"LinearSVC(C=0.0010880211554764913, class_weigh...",0.001088,2.0,0.227838,rbf,7.0,0.212374,"{'model': LinearSVC(C=0.0010880211554764913, c...",1,0.951327,0.991061,0.964365,0.984410,0.970852,0.983352,0.000738,8.747562e-03,0.008125,0.003412
28,0.012170,0.000442,0.838901,0.963613,"DecisionTreeClassifier(class_weight=None, crit...",9.400559,1.0,0.000001,poly,,,{'model': DecisionTreeClassifier(class_weight=...,1,0.842920,0.956425,0.837416,0.968820,0.836323,0.965594,0.000588,2.546011e-05,0.002891,0.005251
15,0.189243,0.009092,0.959911,0.992950,"LinearSVC(C=0.0046305439933453842, class_weigh...",0.004631,1.0,0.000001,linear,1.0,0.872294,"{'model': LinearSVC(C=0.0046305439933453842, c...",1,0.953540,0.993296,0.959911,0.994432,0.966368,0.991121,0.025526,2.544139e-03,0.005237,0.001374
37,0.011029,0.000352,0.851522,0.985888,"DecisionTreeClassifier(class_weight=None, crit...",144091.513974,,,,,,{'model': DecisionTreeClassifier(class_weight=...,1,0.860619,0.983240,0.850780,0.985523,0.843049,0.988901,0.000286,2.745646e-05,0.007192,0.002325
64,0.033273,0.018737,0.977728,0.990719,"SVC(C=0.0011295614767239265, cache_size=200, c...",,,,,,,"{'model': SVC(C=0.0011295614767239265, cache_s...",1,0.975664,0.988827,0.975501,0.993318,0.982063,0.990011,0.001847,2.498938e-04,0.003050,0.001901
58,0.032862,0.021346,0.980698,0.992204,"SVC(C=0.0012587583006411721, cache_size=200, c...",,,,,,,"{'model': SVC(C=0.0012587583006411721, cache_s...",1,0.980088,0.991061,0.975501,0.993318,0.986547,0.992231,0.001393,3.926784e-03,0.004523,0.000922
45,0.030359,0.015567,0.976986,1.000000,"SVC(C=73.05931532523995, cache_size=200, class...",6421.165637,,,,,,"{'model': SVC(C=73.05931532523995, cache_size=...",1,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000318,2.859607e-04,0.005436,0.000000
46,0.029971,0.015279,0.976986,1.000000,"SVC(C=905.68814243472389, cache_size=200, clas...",522520.878477,,,,,,"{'model': SVC(C=905.68814243472389, cache_size...",1,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000202,2.742490e-04,0.005436,0.000000
54,0.035202,0.019511,0.978471,1.000000,"SVC(C=480807.09788999299, cache_size=200, clas...",,,,,,,"{'model': SVC(C=480807.09788999299, cache_size...",1,0.973451,1.000000,0.975501,1.000000,0.986547,1.000000,0.001026,2.902562e-04,0.005744,0.000000


In [19]:
cv_results.sort_values('mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__C,param_model__degree,param_model__gamma,param_model__kernel,param_model__max_depth,param_model__min_samples_split,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
58,0.032862,0.021346,0.980698,0.992204,"SVC(C=0.0012587583006411721, cache_size=200, c...",,,,,,,"{'model': SVC(C=0.0012587583006411721, cache_s...",1,0.980088,0.991061,0.975501,0.993318,0.986547,0.992231,0.001393,3.926784e-03,0.004523,0.000922
56,0.030674,0.015986,0.979213,0.998888,"SVC(C=0.0046427585738361386, cache_size=200, c...",,,,,,,"{'model': SVC(C=0.0046427585738361386, cache_s...",2,0.977876,0.998883,0.975501,1.000000,0.984305,0.997780,0.001385,2.899555e-04,0.003712,0.000906
57,0.034258,0.017469,0.979213,0.992946,"SVC(C=0.0015236853341282384, cache_size=200, c...",,,,,,,"{'model': SVC(C=0.0015236853341282384, cache_s...",2,0.977876,0.992179,0.973274,0.993318,0.986547,0.993341,0.001185,2.639188e-04,0.005492,0.000543
49,0.038111,0.022238,0.978471,1.000000,"SVC(C=66207.89251739216, cache_size=200, class...",,,,,,,"{'model': SVC(C=66207.89251739216, cache_size=...",1,0.973451,1.000000,0.975501,1.000000,0.986547,1.000000,0.002191,2.094236e-03,0.005744,0.000000
54,0.035202,0.019511,0.978471,1.000000,"SVC(C=480807.09788999299, cache_size=200, clas...",,,,,,,"{'model': SVC(C=480807.09788999299, cache_size...",1,0.973451,1.000000,0.975501,1.000000,0.986547,1.000000,0.001026,2.902562e-04,0.005744,0.000000
64,0.033273,0.018737,0.977728,0.990719,"SVC(C=0.0011295614767239265, cache_size=200, c...",,,,,,,"{'model': SVC(C=0.0011295614767239265, cache_s...",1,0.975664,0.988827,0.975501,0.993318,0.982063,0.990011,0.001847,2.498938e-04,0.003050,0.001901
71,0.030762,0.015543,0.976986,1.000000,"SVC(C=1000000.0, cache_size=200, class_weight=...",,,,,,,"{'model': SVC(C=1000000.0, cache_size=200, cla...",2,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000250,2.435308e-04,0.005436,0.000000
45,0.030359,0.015567,0.976986,1.000000,"SVC(C=73.05931532523995, cache_size=200, class...",6421.165637,,,,,,"{'model': SVC(C=73.05931532523995, cache_size=...",1,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000318,2.859607e-04,0.005436,0.000000
46,0.029971,0.015279,0.976986,1.000000,"SVC(C=905.68814243472389, cache_size=200, clas...",522520.878477,,,,,,"{'model': SVC(C=905.68814243472389, cache_size...",1,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000202,2.742490e-04,0.005436,0.000000
50,0.037802,0.020124,0.976986,1.000000,"SVC(C=177132.14711581811, cache_size=200, clas...",,,,,,,"{'model': SVC(C=177132.14711581811, cache_size...",3,0.966814,1.000000,0.977728,1.000000,0.986547,1.000000,0.002799,3.503511e-04,0.008073,0.000000
