# LightGBM

In [1]:
import itertools
import lightgbm
import json
import pickle
import numpy as np
import pandas as pd
from bayes_opt import BayesianOptimization
from bayes_opt.observer import JSONLogger
from bayes_opt.event import Events
from bayes_opt.util import load_logs
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK


In [2]:
# @ unused hyperparameters
#               'drop_rate' | used only in dart
#               'max_drop' | used only in dart
#               'skip_drop' | used only in dart
#               'xgboost_dart_mode' | used only in dart
#               'uniform_drop' | used only in dart
#               'drop_seed' | used only in dart
#               'top_rate' | | for safety not used
#               'other_rate' | for safety not used
#               'top_k' | used only in Voteing parallel
#               'monotone_constraints' | default
#               'feature_contri' | default
#               'forcedsplits_filename' | default
#               'forcedbins_filename' | default
#               'refit_decay_rate' | used only in refit task
#               'cegb_penalty_feature_lazy' | default
#               'cegb_penalty_feature_coupled' | default
#               'cegb_tradeoff': (1, 10), \
#               'cegb_penalty_split': (0 ,10), \

## Bayes optimizer

In [4]:
base_params = {'task': 'train', \
               'objective': 'binary', \
               'tree_learner': 'serial', \
               'num_threads': 4, \
               'device_type': 'cpu', \
               'seed': 1213, \
               'num_leaves': 500, \
               ## learning control parameters
               'bagging_seed': 42, \
               'feature_fraction_seed': 3, \
               'first_metric_only': False, \
               'max_delta_step': 0, \
               'min_sum_hessian_in_leaf': 0.05, \
               'bagging_fraction': 1, \
               'pos_bagging_fraction': 1, \
               'neg_bagging_fraction': 1, \
               'bagging_freq': 0, \
               'feature_fraction': 1, \
               'feature_fraction_bynode': 1, \
               'min_data_in_leaf': 250, \
               'lambda_l1': 250, \
               ## IO parameters
               'bin_construct_sample_cnt': 200000, \
               'histogram_pool_size': -1, \
               ## objective parameters
               'is_unbalance': True, \
               'metric': 'auc,binary_logloss,binary_error', \
               'metric_freq': 1, \
               'max_bin': 511, \
               'min_data_in_bin': 3, \
               'min_gain_to_split': 0, \
               ### parameters for categorical features
               'min_data_per_group': 100, \
               'max_cat_threshold': 32, \
               'cat_l2': 500, \
               'cat_smoth': 500
              }

cat_params = {'boosting': ['gbdt']}

num_params = {
#               'num_leaves': (2, 1024), \
              ## learning control parameters
              'max_depth': (1, 200), \
#               'min_data_in_leaf': (2, 500), \
#               'min_sum_hessian_in_leaf': (0, 0.1), \
#               'bagging_fraction': (0.1, 1), \
#               'pos_bagging_fraction': (0.1, 1), \
#               'neg_bagging_fraction': (0.1, 1), \
#               'bagging_freq': (0, 100), \
#               'feature_fraction': (0.1, 1), \
#               'feature_fraction_bynode': (0.1, 1), \
#               'lambda_l1': (0, 500), \
              'lambda_l2': (1000, 4000), \
              ## objective parameters
              'sigmoid': (0.1, 500), \
              ### parameters for categorical features
#               'cat_l2': (10, 1000), \
#               'cat_smoth': (10, 1000), \
              'max_cat_to_onehot': (1, 100)
             }

int_params = ['num_leaves', 'max_depth', 'min_data_in_leaf', 'bagging_freq', \
              'min_data_per_group', 'max_cat_threshold', 'max_cat_to_onehot', \
              'max_bin', 'min_data_in_bin', 'max_cat_to_onehot']

In [None]:
lgb_bayes = LightGBM_binary_bayes_opt(X_train, y_train, X_eval, y_eval, X_test, y_test, \
                                      base_params, cat_params, num_params, int_params, \
                                      load_log=False, num_opts=500)
lgb_bayes.optimize_lgb()

## Hyperopt optimizer

In [6]:
base_params = {'task': 'train', \
               'objective': 'binary', \
               'tree_learner': 'serial', \
               'num_threads': 4, \
               'device_type': 'cpu', \
               'seed': 1213, \
               'bagging_seed': 42, \
               'feature_fraction_seed': 3, \
               'first_metric_only': False, \
               'max_delta_step': 0, \
               'bin_construct_sample_cnt': 200000, \
               'histogram_pool_size': -1, \
               'is_unbalance': True, \
               'metric': 'auc,binary_logloss,binary_error', \
               'metric_freq': 1}

cat_params = {'boosting': ['gbdt']}

int_params = {'num_leaves': (2, 1024, 8), \
              'max_depth': (1, 100, 1), \
              'min_data_in_leaf': (2, 500, 4), \
              'bagging_freq': (0, 100, 1), \
              'min_data_per_group': (100, 500, 10), \
              'max_cat_threshold': (16, 256, 2), \
              'max_cat_to_onehot': (1, 100, 1), \
              'max_bin': (127, 511, 2), \
              'min_data_in_bin': (3, 128, 8)}

float_params = {'min_sum_hessian_in_leaf': (0, 0.1), \
                'bagging_fraction': (0.1, 1), \
                'pos_bagging_fraction': (0.1, 1), \
                'neg_bagging_fraction': (0.1, 1), \
                'feature_fraction': (0.1, 1), \
                'feature_fraction_bynode': (0.1, 1), \
                'lambda_l1': (0, 500), \
                'lambda_l2': (1000, 4000), \
                'sigmoid': (0.1, 500), \
                'cat_l2': (10, 1000), \
                'cat_smoth': (10, 1000), \
                'min_gain_to_split': (0, 100)}

In [None]:
lgb_hyperopt = lightgbm_hyperopt_binary(X_train, y_train, X_eval, y_eval, X_test, y_test, \
                                        base_params, cat_params, int_params, float_params, \
                                        num_opts=50, trials_path='./trials.pkl', load_trials=True)
best_params = lgb_hyperopt.optimize_lgb()