In [1]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from sklearn.datasets import load_iris, load_digits
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

import pandas as pd

X, y = load_digits(10, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0)

# used to try different model classes
pipe = Pipeline([
    ('model', SVC())
])

# single categorical value of 'model' parameter is used  to set the model class
lin_search = {
    'model': Categorical([LinearSVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
}

dtc_search = {
    'model': Categorical([DecisionTreeClassifier()]),
    'model__max_depth': Integer(1,32),
    'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
}

svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1,8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}

In [2]:
opt = BayesSearchCV(
    pipe,
    [(lin_search, 16), (dtc_search, 24), (svc_search, 32)], # (parameter space, # of evaluations)
)

opt.fit(X_train, y_train)



In [4]:
opt.best_estimator_

Pipeline(steps=[('model', SVC(C=685.79254083046715, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=1, gamma=0.00067292562584484828,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [9]:
type(opt.cv_results_)

collections.defaultdict

In [10]:
opt.cv_results_.keys()

dict_keys(['split0_test_score', 'split1_test_score', 'split2_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'mean_train_score', 'std_train_score', 'mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_model', 'param_model__C', 'params', 'param_model__max_depth', 'param_model__min_samples_split', 'param_model__degree', 'param_model__gamma', 'param_model__kernel', 0])

In [11]:
len(opt.cv_results_.keys())

24

In [12]:
len(opt.cv_results_.items())

24

In [13]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

In [14]:
cv_results = pd.DataFrame.from_dict(opt.cv_results_)

ValueError: arrays must all be same length

In [16]:
for k, v in opt.cv_results_.items():
    print(k, len(v))

split0_test_score 72
split1_test_score 72
split2_test_score 72
mean_test_score 72
std_test_score 72
rank_test_score 72
split0_train_score 72
split1_train_score 72
split2_train_score 72
mean_train_score 72
std_train_score 72
mean_fit_time 72
std_fit_time 72
mean_score_time 72
std_score_time 72
param_model 72
param_model__C 48
params 72
param_model__max_depth 24
param_model__min_samples_split 24
param_model__degree 32
param_model__gamma 32
param_model__kernel 32
0 0


In [17]:
opt.cv_results_.pop(0)

[]

In [18]:
for k, v in opt.cv_results_.items():
    print(k, len(v))

split0_test_score 72
split1_test_score 72
split2_test_score 72
mean_test_score 72
std_test_score 72
rank_test_score 72
split0_train_score 72
split1_train_score 72
split2_train_score 72
mean_train_score 72
std_train_score 72
mean_fit_time 72
std_fit_time 72
mean_score_time 72
std_score_time 72
param_model 72
param_model__C 48
params 72
param_model__max_depth 24
param_model__min_samples_split 24
param_model__degree 32
param_model__gamma 32
param_model__kernel 32


In [19]:
cv_results = pd.DataFrame(opt.cv_results_)

ValueError: arrays must all be same length

### Hacky solutions from [here](https://stackoverflow.com/questions/19736080/creating-dataframe-from-a-dictionary-where-entries-have-different-lengths)

In [21]:
cv_results = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in opt.cv_results_.items()]))

In [22]:
cv_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
mean_fit_time                     72 non-null float64
mean_score_time                   72 non-null float64
mean_test_score                   72 non-null float64
mean_train_score                  72 non-null float64
param_model                       72 non-null object
param_model__C                    48 non-null float64
param_model__degree               32 non-null float64
param_model__gamma                32 non-null float64
param_model__kernel               32 non-null object
param_model__max_depth            24 non-null float64
param_model__min_samples_split    24 non-null float64
params                            72 non-null object
rank_test_score                   72 non-null int64
split0_test_score                 72 non-null float64
split0_train_score                72 non-null float64
split1_test_score                 72 non-null float64
split1_train_score                72 n

In [23]:
cv_results2 = pd.DataFrame.from_dict(opt.cv_results_, orient='index').transpose()

In [24]:
cv_results2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 23 columns):
split0_test_score                 72 non-null object
split1_test_score                 72 non-null object
split2_test_score                 72 non-null object
mean_test_score                   72 non-null object
std_test_score                    72 non-null object
rank_test_score                   72 non-null object
split0_train_score                72 non-null object
split1_train_score                72 non-null object
split2_train_score                72 non-null object
mean_train_score                  72 non-null object
std_train_score                   72 non-null object
mean_fit_time                     72 non-null object
std_fit_time                      72 non-null object
mean_score_time                   72 non-null object
std_score_time                    72 non-null object
param_model                       72 non-null object
param_model__C                    48 non-null obj