In [2]:
import os
import sys

# Init project path
PROJECT_DIR = os.getcwd() + "/../"
sys.path.insert(0, PROJECT_DIR)
%load_ext autoreload
%autoreload 2

In [4]:
import numpy as np
import pandas as pd

from db import table_load, table_write
from models import LightGBM
from utils import timer

In [13]:
if len(sys.argv) == 2:
    config_file_name = sys.argv[1]
else:
    config_file_name = "lightgbm_0"

print("Config file Name: ", config_file_name)

config: dict = json.load(open("./../configs/{}.json".format(config_file_name)))


Config file Name:  lightgbm_0


In [11]:
import optuna
import lightgbm as lgb
import numpy as np
import sklearn.metrics
from sklearn.model_selection import train_test_split
from db import table_load

In [20]:
def objective(trial):
    train = table_load(table_name=config["dataset"]["train_table"])[
        config["features"]["train"] + config["features"]["target"]
    ]
    train_x, test_x, train_y, test_y = train_test_split(
        train[config["features"]["train"]],
        train[config["features"]["target"]].iloc[:, 0],
        test_size=0.25,
    )

    dtrain = lgb.Dataset(train_x, label=train_y)

    param = {
        "objective": "binary",
        "metric": "binary_logloss",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(test_x)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
    return accuracy

In [21]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print('Number of finished trials: {}'.format(len(study.trials)))

print('Best trial:')
trial = study.best_trial

print('  Value: {}'.format(trial.value))

print('  Params: ')
for key, value in trial.params.items():
    print('    {}: {}'.format(key, value))

[I 2019-11-12 11:03:03,087] Finished trial#0 resulted in value: 0.7937219730941704. Current best value is 0.7937219730941704 with parameters: {'lambda_l1': 0.00010649155432682113, 'lambda_l2': 1.1975420865465626e-06, 'num_leaves': 117, 'feature_fraction': 0.9061307389093418, 'bagging_fraction': 0.5715283842087624, 'bagging_freq': 3, 'min_child_samples': 95}.
[I 2019-11-12 11:03:03,544] Finished trial#1 resulted in value: 0.7533632286995515. Current best value is 0.7937219730941704 with parameters: {'lambda_l1': 0.00010649155432682113, 'lambda_l2': 1.1975420865465626e-06, 'num_leaves': 117, 'feature_fraction': 0.9061307389093418, 'bagging_fraction': 0.5715283842087624, 'bagging_freq': 3, 'min_child_samples': 95}.
[I 2019-11-12 11:03:03,735] Finished trial#2 resulted in value: 0.7623318385650224. Current best value is 0.7937219730941704 with parameters: {'lambda_l1': 0.00010649155432682113, 'lambda_l2': 1.1975420865465626e-06, 'num_leaves': 117, 'feature_fraction': 0.9061307389093418, 'b

[I 2019-11-12 11:03:09,947] Finished trial#22 resulted in value: 0.8071748878923767. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:10,215] Finished trial#23 resulted in value: 0.7982062780269058. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:10,734] Finished trial#24 resulted in value: 0.820627802690583. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'baggin

[I 2019-11-12 11:03:17,226] Finished trial#44 resulted in value: 0.7757847533632287. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:17,641] Finished trial#45 resulted in value: 0.8295964125560538. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:17,985] Finished trial#46 resulted in value: 0.7802690582959642. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'baggi

[I 2019-11-12 11:03:26,610] Finished trial#66 resulted in value: 0.757847533632287. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:26,923] Finished trial#67 resulted in value: 0.7892376681614349. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:27,270] Finished trial#68 resulted in value: 0.8116591928251121. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'baggin

[I 2019-11-12 11:03:37,227] Finished trial#88 resulted in value: 0.7757847533632287. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:37,666] Finished trial#89 resulted in value: 0.7757847533632287. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'bagging_fraction': 0.49612790326753337, 'bagging_freq': 3, 'min_child_samples': 14}.
[I 2019-11-12 11:03:38,106] Finished trial#90 resulted in value: 0.7668161434977578. Current best value is 0.8609865470852018 with parameters: {'lambda_l1': 2.460993008963375e-06, 'lambda_l2': 0.002217296722509097, 'num_leaves': 101, 'feature_fraction': 0.6158405405198908, 'baggi

Number of finished trials: 100
Best trial:
  Value: 0.8609865470852018
  Params: 
    lambda_l1: 2.460993008963375e-06
    lambda_l2: 0.002217296722509097
    num_leaves: 101
    feature_fraction: 0.6158405405198908
    bagging_fraction: 0.49612790326753337
    bagging_freq: 3
    min_child_samples: 14


In [26]:
import pickle
with open("./../output/hp_tuning_results/{}_study.pickle".format(config_file_name), "wb") as f:
    pickle.dump(study, f)
