In [1]:
import numpy as np
from lightgbm import LGBMClassifier, LGBMRegressor
import lightgbm as lgb
import sklearn
import json
import os

In [2]:
n_rows, n_cols, n_grps = 1000, 10, 5

x_train = np.random.rand(n_rows, n_cols)
x_test = np.random.rand(n_rows, n_cols)

y_train = np.random.randint(2, size=n_rows)
y_test = np.random.randint(2, size=n_rows)

groups_train = np.random.randint(n_grps, size=n_rows)
groups_test = np.random.randint(n_grps, size=n_rows)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(1000, 10) (1000, 10) (1000,) (1000,)


In [3]:
import warnings
warnings.filterwarnings("ignore")

# hyper-parameter tunning
import tuner_config
from auto_lgbm import LGBMTuner
lgbm_tr = LGBMTuner(configs=tuner_config)
params_opt = lgbm_tr.tune(X=x_train,
                        y=y_train,
                        groups=groups_train,
                        categorical_feature='auto',
                        feature_name=None)

[I 211102 21:07:49 auto_lgbm:49] hyper-parameter tunning in progress ...




[I 211102 21:07:55 auto_lgbm:64] parameters are stored in auto_lgbm.json


In [4]:
params_opt

{'learning_rate': 0.1,
 'n_estimators': 100,
 'subsample_freq': 1,
 'max_depth': 12,
 'min_data_in_leaf': 679,
 'num_leaves': 44,
 'subsample': 0.9036402882362617,
 'feature_fraction': 0.816089466689486,
 'lambda_l1': 94,
 'lambda_l2': 90,
 'min_gain_to_split': 7.346768424642352,
 'min_child_weight': 1.543389098464961e-05}

In [5]:
estmtr_opt = LGBMClassifier(**params_opt)
estmtr_opt.get_params()

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': 12,
 'min_child_samples': 20,
 'min_child_weight': 1.543389098464961e-05,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 44,
 'objective': None,
 'random_state': None,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': 'warn',
 'subsample': 0.9036402882362617,
 'subsample_for_bin': 200000,
 'subsample_freq': 1,
 'min_data_in_leaf': 679,
 'feature_fraction': 0.816089466689486,
 'lambda_l1': 94,
 'lambda_l2': 90,
 'min_gain_to_split': 7.346768424642352}

In [6]:
estmtr_default = LGBMClassifier()
estmtr_default.get_params()

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 31,
 'objective': None,
 'random_state': None,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': 'warn',
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0}

In [7]:
fit_params = dict(callbacks = [lgb.log_evaluation(period=50)],
              eval_set = [(x_train, y_train),(x_test, y_test)],
              eval_names = ['train','val'],
              eval_metric = ['binary_logloss', 'auc'],
              early_stopping_rounds = 10,
              feature_name = 'auto',)

estmtr_opt.fit(x_train, y_train, **fit_params)

LGBMClassifier(feature_fraction=0.816089466689486, lambda_l1=94, lambda_l2=90,
               max_depth=12, min_child_weight=1.543389098464961e-05,
               min_data_in_leaf=679, min_gain_to_split=7.346768424642352,
               num_leaves=44, subsample=0.9036402882362617, subsample_freq=1)

In [8]:
estmtr_opt.best_score_

defaultdict(collections.OrderedDict,
            {'train': OrderedDict([('binary_logloss', 0.6931471805599466),
                          ('auc', 0.5)]),
             'val': OrderedDict([('binary_logloss', 0.6931471805599466),
                          ('auc', 0.5)])})

In [9]:
estmtr_default.fit(x_train, y_train, **fit_params)
estmtr_default.best_score_

defaultdict(collections.OrderedDict,
            {'train': OrderedDict([('binary_logloss', 0.6746990463697193),
                          ('auc', 0.751508)]),
             'val': OrderedDict([('binary_logloss', 0.6947586138837399),
                          ('auc', 0.49226625686832426)])})