# Hyperparameter Tuning Methods Comparison
Cross validation approach based on the following [repo](https://github.com/roamanalytics/roamresearch/tree/master/BlogPosts/Hyperparameter_tuning_comparison)

In [23]:
%matplotlib inline

In [24]:
from hpt_cmp import *

In [25]:
%load_ext autoreload
%aimport hpt_cmp
%autoreload 1

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
from __future__ import print_function
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [35]:
# Loading the Digits dataset
digits = datasets.load_digits()

# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:


In [36]:
param_grid = {
    'C': [1, 10, 100, 1000],  
    'gamma': [0.001, 0.0001],
    'kernel': ['linear', 'rbf']
}

hyperopt_grid = {
    'C': hp.choice('C', [1, 10, 100, 1000]),
    'gamma': hp.choice('gamma', [0.001,0.001]),
    'kernel': hp.choice('kernel', ['linear', 'rbf'])
}

In [37]:
hpt_objs = [
    # add more objs once more search functions implemented
    
    {
        'name': 'Tree of Parzen Estimator',
        'cv' : tpe_search,
        'param_grid': hyperopt_grid,
        'args': {}
    },
    {
        'name': 'RandomSearch',
        'cv' : random_search,
        'param_grid': param_grid,
        'args': {}
    },
    {
        'name': 'GridSearch',
        'cv': grid_search,
        'param_grid': param_grid,
        'args' : {}
    },
    {
        'name': 'Baseline',
        'cv': run_baseline,
        'param_grid' : {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'},
        # {}, # use default config
        'args': {}
    }
]

hpt_objs = [DotMap(obj, _dynamic=False) for obj in hpt_objs ]
hpt_objs

[DotMap(name='Tree of Parzen Estimator', cv=<function tpe_search at 0x7f03f4e3f9d8>, param_grid=DotMap(C=<hyperopt.pyll.base.Apply object at 0x7f03f4ddea58>, gamma=<hyperopt.pyll.base.Apply object at 0x7f03f4ddeda0>, kernel=<hyperopt.pyll.base.Apply object at 0x7f03f4ddef28>), args=DotMap()),
 DotMap(name='RandomSearch', cv=<function random_search at 0x7f03f4e3f950>, param_grid=DotMap(C=[1, 10, 100, 1000], gamma=[0.001, 0.0001], kernel=['linear', 'rbf']), args=DotMap()),
 DotMap(name='GridSearch', cv=<function grid_search at 0x7f03f4e3f8c8>, param_grid=DotMap(C=[1, 10, 100, 1000], gamma=[0.001, 0.0001], kernel=['linear', 'rbf']), args=DotMap()),
 DotMap(name='Baseline', cv=<function run_baseline at 0x7f03f4e3fa60>, param_grid=DotMap(C=1, gamma=0.01, kernel='rbf'), args=DotMap())]

#### cmp_hpt_methods
Parameter description
> `htp_objs`: list of hyperparam-tuning object <br>
> `model`: sklearn model to optimize (needs to have fit/predict function)<br>
> `dataset`: tuple of (X,y) e.g (Data, Target) <br>
> `loss`: sklearn loss function to user <br>
> `metric`: sklearn metric to optimize for <br>
> `datset_split`: random_state for datasetsplit <br>
> `name`: currently not user *optional* <br>

In [43]:
# Run search & print result
results = cmp_hpt_methods(
    hpt_objs,
    SVC,
    (digits.data, digits.target),
    'log_loss',
    accuracy_score)
results

Start
HTP using Tree of Parzen Estimator


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Tree of Parzen Estimator
{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
Tree of Parzen Estimator
{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}

[0.9567147613762487, 0.9665178571428571]
[7.604509592056274, 7.9029436111450195]
[100, 100]
HTP using RandomSearch


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

RandomSearch
{'kernel': 'rbf', 'gamma': 0.001, 'C': 1000}
RandomSearch
{'kernel': 'rbf', 'gamma': 0.001, 'C': 10}

[0.9589345172031076, 0.9665178571428571]
[0.9106543064117432, 0.7189202308654785]
[10, 10]
HTP using GridSearch


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

GridSearch
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
GridSearch
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}

[0.9589345172031076, 0.9665178571428571]
[1.2330317497253418, 1.2664835453033447]
[16, 16]
HTP using Baseline


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Baseline
{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}
Baseline
{'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}

[0.586015538290788, 0.6785714285714286]
[0.154191255569458, 0.14970946311950684]
[1, 1]


[{'Model': 'SVC',
  'Hyper optimization method': 'Tree of Parzen Estimator',
  'Test accuracy': [0.9567147613762487, 0.9665178571428571],
  'Best Parameters': [{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'},
   {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}],
  'Parameters sampled': [100, 100],
  'Cross validation time (in s)': [7.604509592056274, 7.9029436111450195],
  'Mean Test accuracy': 0.9616163092595529,
  'Mean Cross validation time (in s)': 7.753726601600647,
  'Mean Parameters sampled': 100.0},
 {'Model': 'SVC',
  'Hyper optimization method': 'RandomSearch',
  'Test accuracy': [0.9589345172031076, 0.9665178571428571],
  'Best Parameters': [{'kernel': 'rbf', 'gamma': 0.001, 'C': 1000},
   {'kernel': 'rbf', 'gamma': 0.001, 'C': 10}],
  'Parameters sampled': [10, 10],
  'Cross validation time (in s)': [0.9106543064117432, 0.7189202308654785],
  'Mean Test accuracy': 0.9627261871729824,
  'Mean Cross validation time (in s)': 0.8147872686386108,
  'Mean Parameters sampled': 10.0},
 {'Mode