In [1]:
import lightgbm as lgb
import itertools
import json
import numpy as np
from sklearn.metrics import mean_squared_error
from data_processor import read_data
import neptune

import warnings
warnings.filterwarnings("ignore")

print('Loading data...')
x_train, y_train = read_data('../data/only7doses/', 'train')
x_eval, y_eval = read_data('../data/only7doses/', 'eval')

neptune.init('kowson/OLN')

used_params = []

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

Loading data...
Read dataset for train from file ../data/train.csv
Read dataset for eval from file ../data/eval.csv


In [2]:
print('Preparing LightGBM datasets...')
lgb_train = lgb.Dataset(x_train, y_train)
lgb_eval = lgb.Dataset(x_eval, y_eval, reference=lgb_train)


Preparing LightGBM datasets...


In [3]:
TAGS = ['lightgbm', 'data_v4', 'one_dose', 'scaled', 'relative', 'mape']

def neptune_monitor():
    def callback(env):
        for name, loss_name, loss_value, _ in env.evaluation_result_list:
            neptune.send_metric('{}_{}'.format(name, loss_name), x=env.iteration, y=loss_value)
    return callback


In [4]:
params_dict = {
    'boosting_type': ['gbdt', 'dart',],
    'objective': ['regression'],
    'metric': [['l2', 'l1']],
    'num_leaves': [20, 31, 40, 50, 100],
    'num_rounds': [70, 100, 150],
    'learning_rate': [0.05, 0.1, 0.2, 0.5],
    'bagging_fraction': [0.95],
    'bagging_freq': [5],
    # 'verbose': 1,
}

In [5]:
print("Training...")
try:
    with open('lgb_params_4.json') as data_file:
        used_params = json.load(data_file)
except (FileNotFoundError, json.JSONDecodeError): 
    used_params = []
keys, values = zip(*params_dict.items())
for v in itertools.product(*values):
    experiment_params = dict(zip(keys, v))
    if any(param_dict == experiment_params for param_dict in used_params):
        continue  # skip already computed
    neptune.create_experiment(
        name='LightGBM regressor on only 7 relative doses, scaled',
        params=experiment_params,
        tags=TAGS
    )
    params2 = experiment_params
    params2['verbose'] = 0
    gbm = lgb.train(
        params=params2, 
        train_set=lgb_train, 
        valid_sets=[lgb_train, lgb_eval],
        early_stopping_rounds=5,
        callbacks=[neptune_monitor()],
        verbose_eval=False,
    )
    # PREDICT AND EVAL
    y_pred = gbm.predict(x_eval, num_iteration=gbm.best_iteration)
    error = mean_squared_error(y_eval, y_pred) ** 0.5
    print("RMSE of prediction is: {}".format(error))
    mape = mean_absolute_percentage_error(y_eval, y_pred)
    print("MAPE of prediction is: {}".format(error))
    neptune.log_text('rmse', str(error))
    neptune.log_text('mape', str(mape))
    neptune.stop()
    used_params.append(experiment_params)
    with open('lgb_params_4.json', 'w') as outfile:
        json.dump(used_params, outfile, sort_keys=True, indent=4)

Training...
https://ui.neptune.ai/kowson/OLN/e/OLN-1781
RMSE of prediction is: 0.09463605402162119
MAPE of prediction is: 0.09463605402162119
https://ui.neptune.ai/kowson/OLN/e/OLN-1782
RMSE of prediction is: 0.15930716956835891
MAPE of prediction is: 0.15930716956835891
https://ui.neptune.ai/kowson/OLN/e/OLN-1783
RMSE of prediction is: 0.09320389029581905
MAPE of prediction is: 0.09320389029581905
https://ui.neptune.ai/kowson/OLN/e/OLN-1784
RMSE of prediction is: 0.15811641667207527
MAPE of prediction is: 0.15811641667207527
https://ui.neptune.ai/kowson/OLN/e/OLN-1785
RMSE of prediction is: 0.0924515217186242
MAPE of prediction is: 0.0924515217186242
https://ui.neptune.ai/kowson/OLN/e/OLN-1786
RMSE of prediction is: 0.1575600326544668
MAPE of prediction is: 0.1575600326544668
https://ui.neptune.ai/kowson/OLN/e/OLN-1787
RMSE of prediction is: 0.09191406272380562
MAPE of prediction is: 0.09191406272380562
https://ui.neptune.ai/kowson/OLN/e/OLN-1788
RMSE of prediction is: 0.1572687955184