## Scikit-optimize Hyperparameter optimisation notebook 
[Main notebook link](https://scikit-optimize.github.io/stable/auto_examples/hyperparameter-optimization.html)


In [1]:
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline 
%config InlineBackend.figure_format = 'retina'

boston = load_boston()
X, y = boston.data, boston.target
n_features = X.shape[1]

# gradient boosted trees tend to do well on problems like this
reg = GradientBoostingRegressor(n_estimators=50, random_state=0)

In [2]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})"
                  .format(results['mean_test_score'][candidate],
                          results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [5]:
#Same thing but with BayesSearchCV
from skopt import BayesSearchCV
import time as time
import scipy.stats as stats
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from dask.distributed import Client
import joblib

params  = {
        'max_depth':Integer(1, 5),
        'learning_rate':Real(10**-5, 10**0, prior="log-uniform"),
        'max_features':Integer(1, n_features),
        'min_samples_split':Integer(2, 100),
        'min_samples_leaf':Integer(1, 100)
        }

search = BayesSearchCV(
    reg,
    params,
    n_iter=50,
    cv=5,
    n_jobs=1,
    random_state=0)
start = time.time()

client = Client(processes=False) 
with joblib.parallel_backend('dask'):
    search.fit(X, y)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings." % (time.time() - start, len(search.cv_results_['params'])))
report(search.cv_results_)
best_model = search.best_params_
print('Best model')
print(best_model)

Port 8787 is already in use. 
Perhaps you already have a cluster running?
Hosting the diagnostics dashboard on a random port instead.
distributed.client - ERROR - Error in callback <function DaskDistributedBackend.apply_async.<locals>.callback_wrapper at 0x1204200d0> of <Future: cancelled, key: _fit_and_score-batch-e116f6119a4e4538a5f308715f578dc0>:
Traceback (most recent call last):
  File "/Users/pghaneka/miniconda3/envs/py37/lib/python3.6/site-packages/distributed/client.py", line 283, in execute_callback
    fn(fut)
  File "/Users/pghaneka/miniconda3/envs/py37/lib/python3.6/site-packages/joblib/_dask.py", line 260, in callback_wrapper
    result = future.result()
  File "/Users/pghaneka/miniconda3/envs/py37/lib/python3.6/site-packages/distributed/client.py", line 220, in result
    raise result
concurrent.futures._base.CancelledError: _fit_and_score-batch-e116f6119a4e4538a5f308715f578dc0
distributed.client - ERROR - Error in callback <function DaskDistributedBackend.apply_async.<lo

CancelledError: _fit_and_score-batch-fdcc0451d9dc48dcad8de931346f8cf5

distributed.client - ERROR - Error in callback <function DaskDistributedBackend.apply_async.<locals>.callback_wrapper at 0x120725c80> of <Future: cancelled, key: _fit_and_score-batch-1a5b61bd855d40e19d761b2c4afb156e>:
Traceback (most recent call last):
  File "/Users/pghaneka/miniconda3/envs/py37/lib/python3.6/site-packages/distributed/client.py", line 283, in execute_callback
    fn(fut)
  File "/Users/pghaneka/miniconda3/envs/py37/lib/python3.6/site-packages/joblib/_dask.py", line 260, in callback_wrapper
    result = future.result()
  File "/Users/pghaneka/miniconda3/envs/py37/lib/python3.6/site-packages/distributed/client.py", line 220, in result
    raise result
concurrent.futures._base.CancelledError: _fit_and_score-batch-1a5b61bd855d40e19d761b2c4afb156e
distributed.client - ERROR - Error in callback <function DaskDistributedBackend.apply_async.<locals>.callback_wrapper at 0x120814268> of <Future: cancelled, key: _fit_and_score-batch-fdcc0451d9dc48dcad8de931346f8cf5>:
Traceback (

In [None]:
from skopt.plots import plot_convergence
plot_convergence(search)
plt.show()

In [None]:
plt.show()

In [None]:
len(search.cv_results_['mean_test_score'])