In [28]:
import numpy as np

from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize, Optimizer

from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score

In [205]:
def hyper_parameter_tuning(model, space, train_data, train_label,
                           obj,
                           training_vars=None,
                           valid_data=None,
                           valid_label=None,
                           train_weight=None,
                           valid_weight=None,
                           early_stopping_rounds=1000,
                           num_boost_round = 50000,
                           ncalls = 25,
                           n_rand_starts = 5,
                           rf = False):

#     train = lgb.Dataset(np.array([train_data[var] for var in training_vars]).T,
#                     label = train_label,
#                     weight= train_weight,
#                     feature_name=training_vars)#,
#                     #init_sore = 0)
#     valid = train.create_valid(np.array([valid_data[var] for var in training_vars]).T,
#                    label= valid_label, weight = valid_weight)#, init_score = 0)

    @use_named_args(space)
    def objective(**params):
        results = {}

        params['n_jobs'] = -1 #cpu_n_jobs
        params['first_metric_only'] = True
        
        if callable(obj):
            fobj = obj
        else:
            params['objective'] = obj
            fobj = None
        
        params['metric'] = 'rmse'
        
        if(rf):
            params['boosting_type'] = 'rf'
        else:
            params['boosting_type'] = 'gbdt'
        lgb.train(params = params, train_set = train, num_boost_round = num_boost_round,
                 valid_sets=[valid], valid_names=['valid'], evals_result = results, fobj = fobj,
                 verbose_eval=50, early_stopping_rounds = early_stopping_rounds)
#         print('________________',results)
        best_result = min(results['valid'][[*results['valid']][0]])
        
        return best_result
    results = gp_minimize(objective, space, n_calls=ncalls, n_random_starts = n_rand_starts, random_state=0, verbose = True) #callback=[checkpoint_saver],
    print(f'Best score: {results.fun}')
    print(f'Best Parameters: {results.x}')

    return results

In [216]:
def objetive(model, X, y):
    
    return np.mean(cross_val_score(model, X, y, cv = 5))

In [217]:
boston = load_boston()
X, y = boston.data, boston.target

In [218]:
X.shape[1]

13

In [219]:
reg = GradientBoostingRegressor()

In [220]:
space = [Integer(1, 20, name='max_depth'),
          Real(10**-3, 10**-1, "log-uniform", name='learning_rate'),
          Integer(2, X.shape[1], name='max_features'),
          Integer(3, 50, name='n_estimators')]
order = ['max_depth', 'learning_rate', 'max_features', 'n_estimators']
opt = Optimizer(space, reg)


In [221]:
reg

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [224]:
score = []
params = []
for i in range(10):
    param = opt.ask()
    param = dict(zip(order, param))
    reg.set_params(**param)
    score.append(objetive(reg, X, y))
    params.append(param)

In [226]:
score

[0.2865351251746823,
 -0.2952905058495331,
 -0.5590393642259391,
 -0.30167762601405684,
 0.3978541663442163,
 -0.3737869929704413,
 -0.5699018928534031,
 0.3492274230046408,
 -0.40817558569533663,
 -0.2213012322848053]

In [227]:
params

[{'max_depth': 14,
  'learning_rate': 0.04739207120482394,
  'max_features': 10,
  'n_estimators': 16},
 {'max_depth': 7,
  'learning_rate': 0.023297409651241836,
  'max_features': 6,
  'n_estimators': 9},
 {'max_depth': 17,
  'learning_rate': 0.001803797747651532,
  'max_features': 13,
  'n_estimators': 37},
 {'max_depth': 1,
  'learning_rate': 0.017440413798120565,
  'max_features': 6,
  'n_estimators': 25},
 {'max_depth': 13,
  'learning_rate': 0.020957816722777974,
  'max_features': 6,
  'n_estimators': 48},
 {'max_depth': 19,
  'learning_rate': 0.009449337925782963,
  'max_features': 7,
  'n_estimators': 17},
 {'max_depth': 18,
  'learning_rate': 0.001761485283366378,
  'max_features': 5,
  'n_estimators': 38},
 {'max_depth': 13,
  'learning_rate': 0.02714536047894435,
  'max_features': 2,
  'n_estimators': 49},
 {'max_depth': 7,
  'learning_rate': 0.006005244278482685,
  'max_features': 13,
  'n_estimators': 23},
 {'max_depth': 8,
  'learning_rate': 0.009638950784788037,
  'max_f