In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import time
import sys
sys.path.append('../models/blend/')
from utils import customized_eval, HyperParameterTuning, KFoldValidation

In [2]:
#---------------------
# setting
#---------------------
BINARY_SCENARIO = None
#---------------------
# load features
#---------------------
feature_dir = '../features/lazada_and_amazon/all_features.h5'
df = pd.read_hdf(feature_dir)
#---------------------
# label post-processing
#---------------------
if df.label.nunique() == 2: 
    BINARY_SCENARIO = True
    # binary class
    df['label'] = df.label.apply(lambda x: 1 if x == 2 else 0) # for customized f1 score inference of lgb
else:
    # multi-class(B, I or O)
    pass

In [3]:
features = df.columns.tolist()[7:]
target = 'label'

In [4]:
#-----------------------
# parameter tuning for session-level
#-----------------------
target = 'label'
n_splits = 5
pbounds = {
'num_leaves': (25, 50),
'lambda_l2': (0.0, 0.05),
'lambda_l1': (0.0, 0.05),
'min_child_samples': (20, 120),
'bagging_fraction': (0.5, 1.0),
'feature_fraction': (0.5, 1.0),
} # 6 parameters to tune

# Create objec HyperParameterTuning for helping us tuning
HP_tuning = HyperParameterTuning(train = df, features= features, target = target,
                                 n_splits = n_splits, params_bound = pbounds)

date_str = datetime.now().strftime('%Y-%m-%d_%H-%M')
s = time.time()
result = HP_tuning.param_tuning(init_points = 5, num_iter = 30)
e = time.time()
logging.info('It took {} mins'.format((e-s)/60.0))

result.points_to_csv('logs/param_tuning_for_lgb_{}.csv'.format(date_str), index = False)

logging.info(pp.pprint(result.res['max']['max_params']))


[31mInitialization[0m
[94m----------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   bagging_fraction |   feature_fraction |   lambda_l1 |   lambda_l2 |   min_child_samples |   num_leaves | 
params {'objective': 'binary', 'metric': 'None', 'num_leaves': 48, 'lambda_l2': 0.01273586787787735, 'lambda_l1': 0.030414811872730513, 'min_child_samples': 62, 'bagging_fraction': 0.7653204710389181, 'feature_fraction': 0.5455923488142518, 'subsample_freq': 1, 'bagging_seed': 1540193136, 'max_depth': -1, 'learning_rate': 0.1, 'verbosity': -1, 'num_threads': 32, 'early_stopping_rounds': 50}
Fold  0 :




[1]	valid_0's f1-score-on-sentence-level: 0
Training until validation scores don't improve for 50 rounds.
[2]	valid_0's f1-score-on-sentence-level: 0
[3]	valid_0's f1-score-on-sentence-level: 0
[4]	valid_0's f1-score-on-sentence-level: 0
[5]	valid_0's f1-score-on-sentence-level: 0.177708
[6]	valid_0's f1-score-on-sentence-level: 0.595411
[7]	valid_0's f1-score-on-sentence-level: 0.772087
[8]	valid_0's f1-score-on-sentence-level: 0.834663
[9]	valid_0's f1-score-on-sentence-level: 0.873399
[10]	valid_0's f1-score-on-sentence-level: 0.891298
Did not meet early stopping. Best iteration is:
[10]	valid_0's f1-score-on-sentence-level: 0.891298
res_score_ls [0, 0, 0, 0, 0.17770849571317227, 0.5954108648754658, 0.7720868556273827, 0.8346625766871166, 0.8733993015133876, 0.8912982258518728]
Fold  1 :
[1]	valid_0's f1-score-on-sentence-level: 0
[2]	valid_0's f1-score-on-sentence-level: 0
[3]	valid_0's f1-score-on-sentence-level: 0
[4]	valid_0's f1-score-on-sentence-level: 0
[5]	valid_0's f1-score

[2]	valid_0's f1-score-on-sentence-level: 0
[3]	valid_0's f1-score-on-sentence-level: 0
[4]	valid_0's f1-score-on-sentence-level: 0
[5]	valid_0's f1-score-on-sentence-level: 0
[6]	valid_0's f1-score-on-sentence-level: 0.552354
[7]	valid_0's f1-score-on-sentence-level: 0.753093
[8]	valid_0's f1-score-on-sentence-level: 0.846839
[9]	valid_0's f1-score-on-sentence-level: 0.876634
[10]	valid_0's f1-score-on-sentence-level: 0.890292
Did not meet early stopping. Best iteration is:
[10]	valid_0's f1-score-on-sentence-level: 0.890292
res_score_ls [0, 0, 0, 0, 0, 0.5523538961038961, 0.7530926961531944, 0.8468386902067301, 0.8766336349274737, 0.8902918069584735]
Fold  1 :
[1]	valid_0's f1-score-on-sentence-level: 0
[2]	valid_0's f1-score-on-sentence-level: 0
[3]	valid_0's f1-score-on-sentence-level: 0
[4]	valid_0's f1-score-on-sentence-level: 0
[5]	valid_0's f1-score-on-sentence-level: 0
[6]	valid_0's f1-score-on-sentence-level: 0.544665
[7]	valid_0's f1-score-on-sentence-level: 0.752776
[8]	val

[8]	valid_0's f1-score-on-sentence-level: 0.851108
[9]	valid_0's f1-score-on-sentence-level: 0.887477
[10]	valid_0's f1-score-on-sentence-level: 0.899236
Did not meet early stopping. Best iteration is:
[10]	valid_0's f1-score-on-sentence-level: 0.899236
res_score_ls [0, 0, 0, 0, 0.013617021276595745, 0.5438275371161277, 0.7295379823498875, 0.8511081362486984, 0.8874770577438938, 0.8992355802640722]
Fold  1 :
[1]	valid_0's f1-score-on-sentence-level: 0
[2]	valid_0's f1-score-on-sentence-level: 0
[3]	valid_0's f1-score-on-sentence-level: 0
[4]	valid_0's f1-score-on-sentence-level: 0
[5]	valid_0's f1-score-on-sentence-level: 0
[6]	valid_0's f1-score-on-sentence-level: 0.55608
[7]	valid_0's f1-score-on-sentence-level: 0.729753
[8]	valid_0's f1-score-on-sentence-level: 0.867399
[9]	valid_0's f1-score-on-sentence-level: 0.892157
[10]	valid_0's f1-score-on-sentence-level: 0.89514
res_score_ls [0, 0, 0, 0, 0, 0.5560795571047776, 0.7297533671506035, 0.8673992673992674, 0.8921568627450981, 0.895

KeyboardInterrupt: 