In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from sklearn.datasets import load_iris, load_digits
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

import pandas as pd

X, y = load_digits(10, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0)

# used to try different model classes
pipe = Pipeline([
    ('model', SVC())
])

# single categorical value of 'model' parameter is used  to set the model class
lin_search = {
    'model': Categorical([LinearSVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
}

dtc_search = {
    'model': Categorical([DecisionTreeClassifier()]),
    'model__max_depth': Integer(1,32),
    'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
}

svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1,8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}



In [2]:
opt = BayesSearchCV(
    pipe,
    [(lin_search, 16), (dtc_search, 24), (svc_search, 32)], # (parameter space, # of evaluations)
    n_jobs=-1
)

opt.fit(X_train, y_train)



In [3]:
opt.best_estimator_

Pipeline(memory=None,
     steps=[('model', SVC(C=1000000.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=10.0, kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [4]:
type(opt.cv_results_)

collections.defaultdict

In [5]:
opt.cv_results_.keys()

dict_keys(['split0_test_score', 'split1_test_score', 'split2_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'mean_train_score', 'std_train_score', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_model', 'param_model__C', 'params', 'param_model__max_depth', 'param_model__min_samples_split', 'param_model__degree', 'param_model__gamma', 'param_model__kernel'])

In [6]:
len(opt.cv_results_.keys())

23

In [7]:
len(opt.cv_results_.items())

23

In [8]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

In [10]:
cv_results = pd.DataFrame.from_dict(opt.cv_results_)

ValueError: arrays must all be same length

In [9]:
for k, v in opt.cv_results_.items():
    print(k, len(v))

split0_test_score 72
split1_test_score 72
split2_test_score 72
mean_test_score 72
std_test_score 72
rank_test_score 72
split0_train_score 72
split1_train_score 72
split2_train_score 72
mean_train_score 72
std_train_score 72
mean_fit_time 72
std_fit_time 72
mean_score_time 72
std_score_time 72
param_model 72
param_model__C 48
params 72
param_model__max_depth 24
param_model__min_samples_split 24
param_model__degree 32
param_model__gamma 32
param_model__kernel 32


In [10]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

### Hacky solutions from [here](https://stackoverflow.com/questions/19736080/creating-dataframe-from-a-dictionary-where-entries-have-different-lengths)

In [11]:
cv_results = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in opt.cv_results_.items()]))

In [12]:
cv_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
mean_fit_time                     72 non-null float64
mean_score_time                   72 non-null float64
mean_test_score                   72 non-null float64
mean_train_score                  72 non-null float64
param_model                       72 non-null object
param_model__C                    48 non-null float64
param_model__degree               32 non-null float64
param_model__gamma                32 non-null float64
param_model__kernel               32 non-null object
param_model__max_depth            24 non-null float64
param_model__min_samples_split    24 non-null float64
params                            72 non-null object
rank_test_score                   72 non-null int64
split0_test_score                 72 non-null float64
split0_train_score                72 non-null float64
split1_test_score                 72 non-null float64
split1_train_score                72 n

In [13]:
cv_results2 = pd.DataFrame.from_dict(opt.cv_results_, orient='index').transpose()

In [14]:
cv_results2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
split0_test_score                 72 non-null object
split1_test_score                 72 non-null object
split2_test_score                 72 non-null object
mean_test_score                   72 non-null object
std_test_score                    72 non-null object
rank_test_score                   72 non-null object
split0_train_score                72 non-null object
split1_train_score                72 non-null object
split2_train_score                72 non-null object
mean_train_score                  72 non-null object
std_train_score                   72 non-null object
mean_fit_time                     72 non-null object
std_fit_time                      72 non-null object
mean_score_time                   72 non-null object
std_score_time                    72 non-null object
param_model                       72 non-null object
param_model__C                    48 non-null obj

In [15]:
pd.options.display.max_columns = None

In [18]:
cv_results.sort_values('mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_model,param_model__C,param_model__degree,param_model__gamma,param_model__kernel,param_model__max_depth,param_model__min_samples_split,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
55,0.023439,0.011707,0.985895,1.000000,"SVC(C=1000000.0, cache_size=200, class_weight=...",,,,,,,"{'model': SVC(C=1000000.0, cache_size=200, cla...",1,0.984513,1.000000,0.986637,1.000000,0.986547,1.000000,0.000297,0.000411,0.000982,0.000000
54,0.025944,0.010655,0.985152,1.000000,"SVC(C=1000000.0, cache_size=200, class_weight=...",,,,,,,"{'model': SVC(C=1000000.0, cache_size=200, cla...",2,0.984513,1.000000,0.984410,1.000000,0.986547,1.000000,0.005261,0.000479,0.000982,0.000000
44,0.034963,0.017613,0.983667,1.000000,"SVC(C=42169.999883669596, cache_size=200, clas...",85959.400448,,,,,,"{'model': SVC(C=42169.999883669596, cache_size...",1,0.982301,1.000000,0.984410,1.000000,0.984305,1.000000,0.000245,0.000342,0.000972,0.000000
51,0.035153,0.017490,0.983667,1.000000,"SVC(C=255660.11341789714, cache_size=200, clas...",,,,,,,"{'model': SVC(C=255660.11341789714, cache_size...",3,0.982301,1.000000,0.984410,1.000000,0.984305,1.000000,0.000100,0.000405,0.000972,0.000000
42,0.042900,0.014715,0.980698,1.000000,"SVC(C=9.2108570814451851e-05, cache_size=200, ...",0.000081,,,,,,"{'model': SVC(C=9.2108570814451851e-05, cache_...",2,0.982301,1.000000,0.982183,1.000000,0.977578,1.000000,0.005894,0.000366,0.002195,0.000000
66,0.038623,0.014320,0.980698,1.000000,"SVC(C=8.0608317513907512e-05, cache_size=200, ...",,,,,,,"{'model': SVC(C=8.0608317513907512e-05, cache_...",1,0.982301,1.000000,0.982183,1.000000,0.977578,1.000000,0.001709,0.000531,0.002195,0.000000
71,0.020013,0.010315,0.976986,1.000000,"SVC(C=1000000.0, cache_size=200, class_weight=...",,,,,,,"{'model': SVC(C=1000000.0, cache_size=200, cla...",2,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.001516,0.000273,0.005436,0.000000
68,0.031476,0.015723,0.976986,1.000000,"SVC(C=85959.400448089582, cache_size=200, clas...",,,,,,,"{'model': SVC(C=85959.400448089582, cache_size...",2,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000215,0.000314,0.005436,0.000000
43,0.031154,0.015744,0.976986,1.000000,"SVC(C=0.06131604018000858, cache_size=200, cla...",359333.065585,,,,,,"{'model': SVC(C=0.06131604018000858, cache_siz...",3,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.000182,0.000255,0.005436,0.000000
49,0.030342,0.015278,0.976986,1.000000,"SVC(C=2172.2110315770406, cache_size=200, clas...",,,,,,,"{'model': SVC(C=2172.2110315770406, cache_size...",4,0.971239,1.000000,0.975501,1.000000,0.984305,1.000000,0.002076,0.000755,0.005436,0.000000


In [19]:
opt.cv_results_

defaultdict(list,
            {'mean_fit_time': [0.021556854248046875,
              0.10156695048014323,
              0.19345100720723471,
              0.23961043357849121,
              0.30873219172159833,
              0.022463719050089519,
              0.077284177144368485,
              0.13144437472025552,
              0.1402271588643392,
              0.14405028025309244,
              0.27042738596598309,
              0.30649256706237793,
              0.25422294934590656,
              0.20630764961242676,
              0.18938128153483072,
              0.096496661504109696,
              0.011680364608764648,
              0.010586102803548178,
              0.015336910883585611,
              0.010360956192016602,
              0.0057263374328613281,
              0.002620061238606771,
              0.0077137947082519531,
              0.0032260417938232422,
              0.0092620849609375,
              0.0051643848419189453,
              0.0096547603607177734,
   