In [None]:
# hyperparameter search space

import time
from hyperopt.pyll import scope
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

lgb_space = {
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.5)),
    'max_depth': scope.int(hp.quniform('max_depth', 3, 20, 1)),
    'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 1)),
    'num_leaves': scope.int(hp.quniform('num_leaves', 2, 100, 1)),
    'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
    'bagging_fraction': hp.uniform('bagging_fraction', 0.6, 1.0),
    'reg_alpha': hp.uniform('reg_alpha', 0.0, 100.0),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 100.0),
    'min_child_samples': scope.int(hp.quniform('min_child_samples', 2, 100, 5)),
}

In [None]:
def hyperopt(estimator, param_space, X_train, y_train, X_test, y_test, num_eval, eval_metric=None):
    
    start = time.time()
    
    def objective_function(params):
        model = estimator(**params)
        score = cross_val_score(model, X_train, y_train, cv=3, scoring=eval_metric).mean()
        return {'loss': -score, 'status': STATUS_OK}

    
    trials = Trials()
    best_param = fmin(objective_function, 
                      param_space, 
                      algo=tpe.suggest, 
                      max_evals=num_eval, 
                      trials=trials,
                      rstate= np.random.RandomState(1))
    loss = [x['result']['loss'] for x in trials.trials]
    
    
    if str(estimator) == "<class 'lightgbm.sklearn.LGBMRegressor'>":
        
        for each in ['num_leaves','max_depth','n_estimators','min_child_samples']:
            best_param[each] = int(best_param[each])
        
        model_best = estimator(**best_param)     
        model_best.fit(X_train, y_train)
        y_pred = model_best.predict(X_test)
        
    
    if str(estimator) == "<class 'xgboost.sklearn.XGBRegressor'>":
         
        for each in ['n_estimators','max_depth','min_child_weight']:
            best_param[each] = int(best_param[each])
                
        model_best = estimator(**best_param)     
        model_best.fit(X_train, y_train)
        y_pred = model_best.predict(X_test)
        
    
    if str(estimator) == "<class 'catboost.core.CatBoostRegressor'>":
         
        for each in ['iterations','depth']:#,'min_child_samples','num_leaves']:
            best_param[each] = int(best_param[each])
                
        model_best = estimator(**best_param)     
        model_best.fit(X_train, y_train)
        test_pool = Pool(X_test)
        y_pred = model_best.predict(test_pool)

    
    print("")
    print("##### Results #####")
    print("Score best parameters: ", min(loss)*-1)
    print("Best parameters: ", best_param)
    print("Test Score: ", mean_absolute_error(y_test, y_pred))
    print("Time elapsed: ", time.time() - start)
    print("Parameter combinations evaluated: ", num_eval)
    
    
    return trials

In [None]:
num_eval=50
eval_metric = 'neg_mean_absolute_error'
lgb_hyperopt = hyperopt(LGBMRegressor, lgb_space, x_train_new, y_train, x_val_new, y_val, num_eval, eval_metric)

In [None]:
# unpack the selected parameters

unpack_function = lambda l: [item for sublist in l for item in sublist]
unpack_all = lgb_hyperopt.best_trial['misc']['vals']
unpack_values = [i for i in unpack_all.values()]
values = unpack_function(unpack_values)
keys = [i for i in unpack_all.keys()]
best_param = {keys[i]: values[i] for i in range(len(keys))} 

for each in ['num_leaves','max_depth','n_estimators','min_child_samples']:
    best_param[each] = int(best_param[each])

In [None]:
# final model performance assessment

lgb = LGBMRegressor(**best_param)
lgb.fit(x_train_new, y_train, eval_set = [(x_train_new, y_train), (x_val_new, y_val)],
         eval_metric= 'mae', early_stopping_rounds=30, verbose=20)

y_pred_final = lgb.predict(x_test_new)
print('Final MAE from using LightGBM Regressor is: %.3f' % mean_absolute_error(y_test, y_pred_final))