# Hyperparameter Tuning Methods Comparison
Cross validation approach based on the following [repo](https://github.com/roamanalytics/roamresearch/tree/master/BlogPosts/Hyperparameter_tuning_comparison)

In [1]:
%matplotlib inline

In [2]:
from hpt_cmp import *

In [3]:
%load_ext autoreload
%aimport hpt_cmp
%autoreload 1

In [4]:
from __future__ import print_function
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [5]:
# Loading the Digits dataset
digits = datasets.load_digits()

# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:


In [6]:
param_grid = {
    'C': [1, 10, 100, 1000],  
    'gamma': [0.0001, 0.001],
    'kernel': ['linear', 'rbf']
}

hyperopt_grid = {
    'C': hp.choice('C', [1, 10, 100, 1000]),
    'gamma': hp.choice('gamma', [0.001,0.0001]),
    'kernel': hp.choice('kernel', ['linear', 'rbf'])
}

In [13]:
hpt_objs = [
    # add more objs once more search functions implemented
    {
        'name': 'BaysianSearch',
        'cv' : baysian_search,
        'param_grid': param_grid,
        'args': {}
    },
    {
        'name': 'Tree of Parzen Estimator',
        'cv' : tpe_search,
        'param_grid': hyperopt_grid,
        'args': {}
    },
    {
        'name': 'RandomSearch',
        'cv' : random_search,
        'param_grid': param_grid,
        'args': {}
    },
#     {
#         'name': 'GridSearch',
#         'cv': grid_search,
#         'param_grid': param_grid,
#         'args' : {}
#     },
    {
        'name': 'Baseline',
        'cv': run_baseline,
        'param_grid' : {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'},
        # {}, # use default config
        'args': {}
    }
]

hpt_objs = [DotMap(obj, _dynamic=False) for obj in hpt_objs ]
hpt_objs

[DotMap(name='BaysianSearch', cv=<function baysian_search at 0x7f9597139840>, param_grid=DotMap(C=[1, 10, 100, 1000], gamma=[0.0001, 0.001], kernel=['linear', 'rbf']), args=DotMap()),
 DotMap(name='Tree of Parzen Estimator', cv=<function tpe_search at 0x7f95971398c8>, param_grid=DotMap(C=<hyperopt.pyll.base.Apply object at 0x7f9596e13fd0>, gamma=<hyperopt.pyll.base.Apply object at 0x7f95970e1208>, kernel=<hyperopt.pyll.base.Apply object at 0x7f95970e1390>), args=DotMap()),
 DotMap(name='RandomSearch', cv=<function random_search at 0x7f95971397b8>, param_grid=DotMap(C=[1, 10, 100, 1000], gamma=[0.0001, 0.001], kernel=['linear', 'rbf']), args=DotMap()),
 DotMap(name='Baseline', cv=<function run_baseline at 0x7f9597139620>, param_grid=DotMap(C=1, gamma=0.0001, kernel='rbf'), args=DotMap())]

#### cmp_hpt_methods
Parameter description
> `htp_objs`: list of hyperparam-tuning object <br>
> `model`: sklearn model to optimize (needs to have fit/predict function)<br>
> `dataset`: tuple of (X,y) e.g (Data, Target) <br>
> `loss`: sklearn loss function to user <br>
> `metric`: sklearn metric to optimize for <br>
> `datset_split`: random_state for datasetsplit <br>
> `name`: currently not user *optional* <br>

In [14]:
# Run search & print result
results = cmp_hpt_methods(
    hpt_objs,
    SVC,
    (digits.data, digits.target),
    'log_loss',
    accuracy_score)
results

Start
HTP using BaysianSearch


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

BaysianSearch
[{'loss': 0.9207589285714286, 'params': {'C': 100, 'gamma': 0.000555754404321924, 'kernel': 'linear'}}, {'loss': 0.9207589285714286, 'params': {'C': 100, 'gamma': 0.00038157301069910714, 'kernel': 'linear'}}, {'loss': 0.9363839285714286, 'params': {'C': 100, 'gamma': 0.00047137331629548647, 'kernel': 'rbf'}}, {'loss': 0.9263392857142857, 'params': {'C': 10, 'gamma': 0.00012082241725340934, 'kernel': 'rbf'}}, {'loss': 0.9263392857142857, 'params': {'C': 10, 'gamma': 0.00010625589856594347, 'kernel': 'rbf'}}, {'loss': 0.9207589285714286, 'params': {'C': 100, 'gamma': 0.0001829316361884109, 'kernel': 'linear'}}, {'loss': 0.9386160714285714, 'params': {'C': 100, 'gamma': 0.0008998018771257247, 'kernel': 'rbf'}}, {'loss': 0.9207589285714286, 'params': {'C': 1, 'gamma': 0.0003279186814924124, 'kernel': 'linear'}}, {'loss': 0.9319196428571429, 'params': {'C': 100, 'gamma': 0.0003328456787724938, 'kernel': 'rbf'}}, {'loss': 0.9375, 'params': {'C': 100, 'gamma': 0.0005166849808795

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Tree of Parzen Estimator
[{'loss': 0.9401330376940134, 'params': {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.926829268292683, 'params': {'C': 1000, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.9423503325942351, 'params': {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.9401330376940134, 'params': {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.926829268292683, 'params': {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.9423503325942351, 'params': {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.926829268292683, 'params': {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.9423503325942351, 'params': {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.926829268292683, 'params': {'C': 1000, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.926829268292683, 'params': {'C': 1, 'gamma'

Tree of Parzen Estimator
[{'loss': 0.8898678414096917, 'params': {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.8898678414096917, 'params': {'C': 10, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.8986784140969163, 'params': {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.8898678414096917, 'params': {'C': 1000, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.8986784140969163, 'params': {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.8898678414096917, 'params': {'C': 1000, 'gamma': 0.001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.8898678414096917, 'params': {'C': 1000, 'gamma': 0.0001, 'kernel': 'linear'}, 'status': 'ok'}, {'loss': 0.8986784140969163, 'params': {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.9008810572687225, 'params': {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}, 'status': 'ok'}, {'loss': 0.9008810572687225, 'params': {'C':

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

RandomSearch
[{'loss': 0.9207589285714286, 'params': {'kernel': 'linear', 'gamma': 0.001, 'C': 1}}, {'loss': 0.9207589285714286, 'params': {'kernel': 'linear', 'gamma': 0.001, 'C': 100}}, {'loss': 0.9207589285714286, 'params': {'kernel': 'linear', 'gamma': 0.001, 'C': 1000}}, {'loss': 0.9207589285714286, 'params': {'kernel': 'linear', 'gamma': 0.0001, 'C': 1000}}, {'loss': 0.9375, 'params': {'kernel': 'rbf', 'gamma': 0.001, 'C': 100}}, {'loss': 0.9241071428571429, 'params': {'kernel': 'rbf', 'gamma': 0.0001, 'C': 1}}, {'loss': 0.9375, 'params': {'kernel': 'rbf', 'gamma': 0.001, 'C': 10}}, {'loss': 0.9229910714285714, 'params': {'kernel': 'rbf', 'gamma': 0.0001, 'C': 1000}}, {'loss': 0.9319196428571429, 'params': {'kernel': 'rbf', 'gamma': 0.001, 'C': 1}}, {'loss': 0.9375, 'params': {'kernel': 'rbf', 'gamma': 0.001, 'C': 1000}}]
{'kernel': 'linear', 'gamma': 0.001, 'C': 1}
RandomSearch
[{'loss': 0.902330743618202, 'params': {'kernel': 'linear', 'gamma': 0.0001, 'C': 1000}}, {'loss': 0.9

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Baseline
[{'loss': 0.9467849223946785, 'params': {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}, 'status': 'ok'}]
{'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
Baseline
[{'loss': 0.8722466960352423, 'params': {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}, 'status': 'ok'}]
{'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}

[0.9334073251942286, 0.9386160714285714]
[0.10172510147094727, 0.08437204360961914]
[1, 1]


[{'Model': 'SVC',
  'HPT method': 'BaysianSearch',
  'Test accuracy': [0.9322974472807991, 0.9464285714285714],
  'Best Parameters': [{'C': 100,
    'gamma': 0.000555754404321924,
    'kernel': 'linear'},
   {'C': 1, 'gamma': 0.0009926809854252654, 'kernel': 'linear'}],
  'Parameters sampled': [10, 10],
  'Cross-val. time (in s)': [1.3805298805236816, 0.8359415531158447],
  'Mean Test accuracy': 0.9393630093546852,
  'Mean Cross-val. time (in s)': 1.1082357168197632,
  'Mean Parameters sampled': 10.0},
 {'Model': 'SVC',
  'HPT method': 'Tree of Parzen Estimator',
  'Test accuracy': [0.9322974472807991, 0.9464285714285714],
  'Best Parameters': [{'C': 1000, 'gamma': 0.0001, 'kernel': 'linear'},
   {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}],
  'Parameters sampled': [100, 100],
  'Cross-val. time (in s)': [4.153706312179565, 4.482623815536499],
  'Mean Test accuracy': 0.9393630093546852,
  'Mean Cross-val. time (in s)': 4.318165063858032,
  'Mean Parameters sampled': 100.0},
 {'Mode

In [15]:
table(results)

Unnamed: 0,HPT method,Mean Test accuracy,Mean Cross-val. time (in s),Mean Parameters sampled
0,BaysianSearch,0.939363,1.108236,10.0
1,Tree of Parzen Estimator,0.939363,4.318165,100.0
2,RandomSearch,0.939363,0.832849,10.0
3,Baseline,0.936012,0.093049,1.0
