In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from sklearn.datasets import load_iris, load_digits
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

import pandas as pd

X, y = load_digits(10, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0)

# used to try different model classes
pipe = Pipeline([
    ('model', SVC())
])

# single categorical value of 'model' parameter is used  to set the model class
lin_search = {
    'model': Categorical([LinearSVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
}

dtc_search = {
    'model': Categorical([DecisionTreeClassifier()]),
    'model__max_depth': Integer(1,32),
    'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
}

svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1,8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}

In [2]:
opt = BayesSearchCV(
    pipe,
    [(lin_search, 16), (dtc_search, 24), (svc_search, 32)], # (parameter space, # of evaluations)
)

opt.fit(X_train, y_train)



In [4]:
opt.best_estimator_

Pipeline(steps=[('model', SVC(C=685.79254083046715, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=1, gamma=0.00067292562584484828,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [9]:
type(opt.cv_results_)

collections.defaultdict

In [10]:
opt.cv_results_.keys()

dict_keys(['split0_test_score', 'split1_test_score', 'split2_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'mean_train_score', 'std_train_score', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_model', 'param_model__C', 'params', 'param_model__max_depth', 'param_model__min_samples_split', 'param_model__degree', 'param_model__gamma', 'param_model__kernel', 0])

In [11]:
len(opt.cv_results_.keys())

24

In [12]:
len(opt.cv_results_.items())

24

In [13]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

In [14]:
cv_results = pd.DataFrame.from_dict(opt.cv_results_)

ValueError: arrays must all be same length

In [16]:
for k, v in opt.cv_results_.items():
    print(k, len(v))

split0_test_score 72
split1_test_score 72
split2_test_score 72
mean_test_score 72
std_test_score 72
rank_test_score 72
split0_train_score 72
split1_train_score 72
split2_train_score 72
mean_train_score 72
std_train_score 72
mean_fit_time 72
std_fit_time 72
mean_score_time 72
std_score_time 72
param_model 72
param_model__C 48
params 72
param_model__max_depth 24
param_model__min_samples_split 24
param_model__degree 32
param_model__gamma 32
param_model__kernel 32
0 0


In [19]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

### Hacky solutions from [here](https://stackoverflow.com/questions/19736080/creating-dataframe-from-a-dictionary-where-entries-have-different-lengths)

In [21]:
cv_results = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in opt.cv_results_.items()]))

In [22]:
cv_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
mean_fit_time                     72 non-null float64
mean_score_time                   72 non-null float64
mean_test_score                   72 non-null float64
mean_train_score                  72 non-null float64
param_model                       72 non-null object
param_model__C                    48 non-null float64
param_model__degree               32 non-null float64
param_model__gamma                32 non-null float64
param_model__kernel               32 non-null object
param_model__max_depth            24 non-null float64
param_model__min_samples_split    24 non-null float64
params                            72 non-null object
rank_test_score                   72 non-null int64
split0_test_score                 72 non-null float64
split0_train_score                72 non-null float64
split1_test_score                 72 non-null float64
split1_train_score                72 n

In [23]:
cv_results2 = pd.DataFrame.from_dict(opt.cv_results_, orient='index').transpose()

In [24]:
cv_results2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
split0_test_score                 72 non-null object
split1_test_score                 72 non-null object
split2_test_score                 72 non-null object
mean_test_score                   72 non-null object
std_test_score                    72 non-null object
rank_test_score                   72 non-null object
split0_train_score                72 non-null object
split1_train_score                72 non-null object
split2_train_score                72 non-null object
mean_train_score                  72 non-null object
std_train_score                   72 non-null object
mean_fit_time                     72 non-null object
std_fit_time                      72 non-null object
mean_score_time                   72 non-null object
std_score_time                    72 non-null object
param_model                       72 non-null object
param_model__C                    48 non-null obj

In [26]:
pd.options.display.max_columns = None

In [27]:
cv_results.sort_values('rank_test_score')

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__C,param_model__degree,param_model__gamma,param_model__kernel,param_model__max_depth,param_model__min_samples_split,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,0.014464,0.006983,0.953229,0.967710,"LinearSVC(C=0.0014842761922023431, class_weigh...",0.000167,3.0,0.059852,rbf,15.0,0.030931,"{'model': LinearSVC(C=0.0014842761922023431, c...",1,0.942478,0.970950,0.966592,0.964365,0.950673,0.967814,0.001514,0.009443,0.010024,0.002689
51,0.025666,0.010797,0.985152,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.984513,1.000000,0.984410,1.000000,0.986547,1.000000,0.003670,0.000927,0.000982,0.000000
50,0.023060,0.012570,0.983667,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.982301,1.000000,0.984410,1.000000,0.984305,1.000000,0.000090,0.000158,0.000972,0.000000
49,0.102624,0.028297,0.521901,0.532648,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.517699,0.521788,0.514477,0.538976,0.533632,0.537181,0.004534,0.001010,0.008359,0.007714
48,0.061855,0.025199,0.919079,0.928363,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.900442,0.931844,0.939866,0.924276,0.917040,0.928968,0.000842,0.000238,0.016185,0.003119
47,0.017992,0.009748,0.976986,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",1223.964396,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000210,0.000220,0.005436,0.000000
46,0.025827,0.015129,0.978471,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",12.644752,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.971239,1.000000,0.977728,1.000000,0.986547,1.000000,0.000077,0.000237,0.006271,0.000000
52,0.107106,0.029879,0.106162,0.106161,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.106195,0.106145,0.106904,0.105791,0.105381,0.106548,0.005243,0.001752,0.000621,0.000310
45,0.056607,0.024344,0.925019,0.936893,"SVC(C=685.79254083046715, cache_size=200, clas...",1000000.000000,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.902655,0.936313,0.946548,0.934298,0.926009,0.940067,0.000117,0.000260,0.017963,0.002390
43,0.023503,0.010355,0.985152,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",1000000.000000,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.984513,1.000000,0.984410,1.000000,0.986547,1.000000,0.002219,0.000530,0.000982,0.000000


In [28]:
cv_results.sort_values('mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__C,param_model__degree,param_model__gamma,param_model__kernel,param_model__max_depth,param_model__min_samples_split,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
60,0.047380,0.022491,0.991834,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.991150,1.000000,0.991091,1.000000,0.993274,1.000000,0.001683,0.001192,0.001013,0.000000
68,0.044779,0.021810,0.991091,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.988938,1.000000,0.991091,1.000000,0.993274,1.000000,0.001501,0.001315,0.001770,0.000000
65,0.037464,0.020555,0.989607,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.988938,1.000000,0.988864,1.000000,0.991031,1.000000,0.002114,0.001579,0.001003,0.000000
44,0.142204,0.029439,0.988864,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",1715.765785,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.986726,1.000000,0.993318,1.000000,0.986547,1.000000,0.003976,0.000153,0.003151,0.000000
43,0.023503,0.010355,0.985152,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",1000000.000000,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.984513,1.000000,0.984410,1.000000,0.986547,1.000000,0.002219,0.000530,0.000982,0.000000
51,0.025666,0.010797,0.985152,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.984513,1.000000,0.984410,1.000000,0.986547,1.000000,0.003670,0.000927,0.000982,0.000000
50,0.023060,0.012570,0.983667,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.982301,1.000000,0.984410,1.000000,0.984305,1.000000,0.000090,0.000158,0.000972,0.000000
56,0.027890,0.015917,0.979955,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.975664,1.000000,0.977728,1.000000,0.986547,1.000000,0.001455,0.001000,0.004714,0.000000
59,0.021134,0.012440,0.979213,0.992574,"SVC(C=685.79254083046715, cache_size=200, clas...",,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.977876,0.991061,0.973274,0.993318,0.986547,0.993341,0.001273,0.000976,0.005492,0.001069
46,0.025827,0.015129,0.978471,1.000000,"SVC(C=685.79254083046715, cache_size=200, clas...",12.644752,,,,,,"{'model': SVC(C=685.79254083046715, cache_size...",1,0.971239,1.000000,0.977728,1.000000,0.986547,1.000000,0.000077,0.000237,0.006271,0.000000
