In [None]:
import lightgbm as lgb
import itertools
import json
from sklearn.metrics import mean_squared_error
from data_processor import read_data
import neptune

import warnings
warnings.filterwarnings("ignore")

print('Loading data...')
x_train, y_train = read_data('../data/only7doses/', 'train')
x_eval, y_eval = read_data('../data/only7doses/', 'eval')

neptune.init('kowson/OLN')

used_params = []

In [2]:
print('Preparing LightGBM datasets...')
lgb_train = lgb.Dataset(x_train, y_train)
lgb_eval = lgb.Dataset(x_eval, y_eval, reference=lgb_train)


Preparing LightGBM datasets...


In [3]:
TAGS = ['lightgbm', 'data_v2', 'one_dose']

def neptune_monitor():
    def callback(env):
        for name, loss_name, loss_value, _ in env.evaluation_result_list:
            neptune.send_metric('{}_{}'.format(name, loss_name), x=env.iteration, y=loss_value)
    return callback


In [4]:
params_dict = {
    'boosting_type': ['gbdt', 'rf', 'dart',],
    'objective': ['regression'],
    'metric': [['l2', 'l1']],
    'num_leaves': [20, 25, 31, 40, 50, 100],
    'num_rounds': [30, 50, 70, 100],
    'learning_rate': [0.001, 0.006, 0.01, 0.05, 0.1, 0.2, 0.5],
    'bagging_fraction': [0.9],
    'bagging_freq': [5],
    # 'verbose': 1,
}

In [5]:
print("Training...")
try:
    with open('lgb_params_2.json') as data_file:    
        used_params = json.load(data_file)
except (FileNotFoundError, json.JSONDecodeError): 
    used_params = []
keys, values = zip(*params_dict.items())
for v in itertools.product(*values):
    experiment_params = dict(zip(keys, v))
    if any(param_dict == experiment_params for param_dict in used_params):
        continue  # skip already computed
    neptune.create_experiment(
        name='LightGBM regressor on only 7 doses',
        params=experiment_params,
        tags=TAGS
    )
    params2 = experiment_params
    params2['verbose'] = 0
    gbm = lgb.train(
        params=params2, 
        train_set=lgb_train, 
        valid_sets=lgb_eval, 
        early_stopping_rounds=5,
        callbacks=[neptune_monitor()],
    )
    # PREDICT AND EVAL
    y_pred = gbm.predict(x_eval, num_iteration=gbm.best_iteration)
    error = mean_squared_error(y_eval, y_pred) ** 0.5
    print("RMSE of prediction is: {}".format(error))
    neptune.log_text('rmse', str(error))
    neptune.stop()
    used_params.append(experiment_params)
    with open('lgb_params_2.json', 'w') as outfile:
        json.dump(used_params, outfile, sort_keys=True, indent=4)

Training...
https://ui.neptune.ai/kowson/OLN/e/OLN-609
[1]	valid_0's l2: 2952.19	valid_0's l1: 43.4412
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 2950.93	valid_0's l1: 43.4313
[3]	valid_0's l2: 2949.67	valid_0's l1: 43.4214
[4]	valid_0's l2: 2948.41	valid_0's l1: 43.4115
[5]	valid_0's l2: 2947.15	valid_0's l1: 43.4016
[6]	valid_0's l2: 2945.97	valid_0's l1: 43.3924
[7]	valid_0's l2: 2944.81	valid_0's l1: 43.3832
[8]	valid_0's l2: 2943.64	valid_0's l1: 43.374
[9]	valid_0's l2: 2942.36	valid_0's l1: 43.3638
[10]	valid_0's l2: 2941.21	valid_0's l1: 43.3547
[11]	valid_0's l2: 2939.96	valid_0's l1: 43.3449
[12]	valid_0's l2: 2938.71	valid_0's l1: 43.3351
[13]	valid_0's l2: 2937.46	valid_0's l1: 43.3252
[14]	valid_0's l2: 2936.22	valid_0's l1: 43.3155
[15]	valid_0's l2: 2934.97	valid_0's l1: 43.3057
[16]	valid_0's l2: 2933.77	valid_0's l1: 43.2961
[17]	valid_0's l2: 2932.57	valid_0's l1: 43.2865
[18]	valid_0's l2: 2931.37	valid_0's l1: 43.2769
[19]	valid_0'

KeyboardInterrupt: 