In [1]:
import time

import numpy as np

from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize, Optimizer

from sklearn.datasets import load_boston, load_iris, load_breast_cancer
from sklearn.model_selection import cross_val_score

import concurrent.futures

# Classifiers
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier

In [2]:
def hyper_parameter_tuning(model, space, train_data, train_label,
                           obj,
                           training_vars=None,
                           valid_data=None,
                           valid_label=None,
                           train_weight=None,
                           valid_weight=None,
                           early_stopping_rounds=1000,
                           num_boost_round = 50000,
                           ncalls = 25,
                           n_rand_starts = 5,
                           rf = False):

#     train = lgb.Dataset(np.array([train_data[var] for var in training_vars]).T,
#                     label = train_label,
#                     weight= train_weight,
#                     feature_name=training_vars)#,
#                     #init_sore = 0)
#     valid = train.create_valid(np.array([valid_data[var] for var in training_vars]).T,
#                    label= valid_label, weight = valid_weight)#, init_score = 0)

    @use_named_args(space)
    def objective(**params):
        results = {}

        params['n_jobs'] = -1 #cpu_n_jobs
        params['first_metric_only'] = True
        
        if callable(obj):
            fobj = obj
        else:
            params['objective'] = obj
            fobj = None
        
        params['metric'] = 'rmse'
        
        if(rf):
            params['boosting_type'] = 'rf'
        else:
            params['boosting_type'] = 'gbdt'
        lgb.train(params = params, train_set = train, num_boost_round = num_boost_round,
                 valid_sets=[valid], valid_names=['valid'], evals_result = results, fobj = fobj,
                 verbose_eval=50, early_stopping_rounds = early_stopping_rounds)
#         print('________________',results)
        best_result = min(results['valid'][[*results['valid']][0]])
        
        return best_result
    results = gp_minimize(objective, space, n_calls=ncalls, n_random_starts = n_rand_starts, random_state=0, verbose = True) #callback=[checkpoint_saver],
    print(f'Best score: {results.fun}')
    print(f'Best Parameters: {results.x}')

    return results

In [3]:
def objetive(model, X, y):
    return np.mean(cross_val_score(model, X, y, cv = 5))

def gridsearch(param):
    model, order, param = param
    param = dict(zip(order, param))
    model.set_params(**param)
    score = objetive(model, X, y)
    return score

def hyper_parameter_tuning(model, space, order, X, y, ncalls = 10, mtype = 'res'):

    mtypes = ['res', 'cls']
    if mtype not in mtypes:
        raise ValueError("Invalid model type. Expected one of: %s" % mtypes)

    start = time.time()
    opt = Optimizer(space)
    params = []
    for i in range(ncalls):
        params.append(opt.ask())
    args = ((model, order, b) for b in params)
    with concurrent.futures.ProcessPoolExecutor() as executor:
        result = executor.map(gridsearch, args)
        results = []
        parameter = []
        for r, param in zip(result, params):
            results.append(r)
            parameter.append(param)
    if mtype == 'res':
        index = np.argmin(np.abs(results))
    else:
        index = np.argmax(np.abs(results))
    parameter = dict(zip(order, parameter[index]))
    print(f'Minimum score: {results[index]}')
    print(f'Parameters: {parameter}')
    print(f'Time it took: {time.time()-start}s')
    return parameter

In [4]:
boston = load_boston()
breast_cancer = load_breast_cancer()
# X, y = boston.data, boston.target
X, y = breast_cancer.data, breast_cancer.target

In [5]:
# Classifiers
reg_gpdt = GradientBoostingRegressor(loss = 'lad')
reg_knn = KNeighborsRegressor()
reg_cls_gpdt = GradientBoostingClassifier()

# spaces
space_gpdt = [Integer(1, 20, name='max_depth'),
          Real(10**-3, 10**-1, "log-uniform", name='learning_rate'),
          Integer(2, X.shape[1], name='max_features'),
          Integer(3, 50, name='n_estimators')]

space_knn = [Integer(3, 400, name='n_neighbors')]


# paramers
order_gpdt = ['max_depth', 'learning_rate', 'max_features', 'n_estimators']

order_knn = ['n_neighbors']

params = hyper_parameter_tuning(model=reg_cls_gpdt, space=space_gpdt, order = order_gpdt, X=X, y=y, ncalls = 100, mtype = 'cls')

Minimum score: 0.9648967551622418
Parameters: {'max_depth': 15, 'learning_rate': 0.05129630847950463, 'max_features': 12, 'n_estimators': 40}
Time it took: 26.17779278755188s


In [72]:
start = time.time()
score = []
params = []
start = time.time()
for i in range(20):
    param = opt.ask()
    param = dict(zip(order, param))
    reg.set_params(**param)
    score.append(objetive(reg, X, y))
    params.append(param)
print(f'Time it took: {time.time()-start}s')

Time it took: 14.987464427947998s


In [74]:
GradientBoostingClassifier().

0

In [75]:
GradientBoostingRegressor().verbose

0