In [1]:
# https://github.com/Microsoft/LightGBM/blob/master/examples/python-guide/simple_example.py

import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

def run(flag=0):
    df_train = pd.read_csv('data/regression.train', header=None, sep='\t')
    df_test = pd.read_csv('data/regression.test', header=None, sep='\t')

    y_train = df_train[0]
    y_test = df_test[0]
    X_train = df_train.drop(0, axis=1)
    X_test = df_test.drop(0, axis=1)

    # create dataset for lightgbm
    if flag == 0:
        print("run by flag 0 - X-test, y-test")
        lgb_train = lgb.Dataset(X_train, y_train)
        lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
    elif flag == 1:
        print("run by flag 1 - X-train, zeros")
        lgb_train = lgb.Dataset(X_train, y_train)
        lgb_eval = lgb.Dataset(X_test, np.zeros(y_test.shape), reference=lgb_train)
    else:
        print("run by flag 2 - X-train, y-train")
        lgb_train = lgb.Dataset(X_train, y_train)
        lgb_eval = lgb.Dataset(X_train, y_train, reference=lgb_train)

    # specify your configurations as a dict
    params = {
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'metric': {'l2', 'l1'},
        'num_leaves': 31,
        'learning_rate': 0.05,
        'feature_fraction': 0.9,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'verbose': 0,
        "random_state": 71
    }

    gbm = lgb.train(params,
                    lgb_train,
                    num_boost_round=3,
                    valid_sets=[lgb_train, lgb_eval],
                    early_stopping_rounds=5)
    print(gbm.feature_importance())

In [2]:
run(flag=0)

run by flag 0 - X-test, y-test
[1]	training's l2: 0.243597	training's l1: 0.492494	valid_1's l2: 0.24288	valid_1's l1: 0.491812
Training until validation scores don't improve for 5 rounds.
[2]	training's l2: 0.239477	training's l1: 0.488088	valid_1's l2: 0.239307	valid_1's l1: 0.48798
[3]	training's l2: 0.235706	training's l1: 0.483953	valid_1's l2: 0.235559	valid_1's l1: 0.483905
Did not meet early stopping. Best iteration is:
[3]	training's l2: 0.235706	training's l1: 0.483953	valid_1's l2: 0.235559	valid_1's l1: 0.483905
[ 0  1  0  7  3 10  0  0  0  2  0  0  0  4  0  0  0  0  2  4  0  0 12  1
 12 10 14  8]


In [3]:
run(flag=1)

run by flag 1 - X-train, zeros
[1]	training's l2: 0.243597	training's l1: 0.492494	valid_1's l2: 0.281387	valid_1's l1: 0.530319
Training until validation scores don't improve for 5 rounds.
[2]	training's l2: 0.239477	training's l1: 0.488088	valid_1's l2: 0.281462	valid_1's l1: 0.530135
[3]	training's l2: 0.235706	training's l1: 0.483953	valid_1's l2: 0.281838	valid_1's l1: 0.530184
Did not meet early stopping. Best iteration is:
[3]	training's l2: 0.235706	training's l1: 0.483953	valid_1's l2: 0.281838	valid_1's l1: 0.530184
[ 0  1  0  7  3 10  0  0  0  2  0  0  0  4  0  0  0  0  2  4  0  0 12  1
 12 10 14  8]


In [4]:
run(flag=2)

run by flag 2 - X-train, y-train
[1]	training's l2: 0.243597	training's l1: 0.492494	valid_1's l2: 0.243597	valid_1's l1: 0.492494
Training until validation scores don't improve for 5 rounds.
[2]	training's l2: 0.239477	training's l1: 0.488088	valid_1's l2: 0.239477	valid_1's l1: 0.488088
[3]	training's l2: 0.235706	training's l1: 0.483953	valid_1's l2: 0.235706	valid_1's l1: 0.483953
Did not meet early stopping. Best iteration is:
[3]	training's l2: 0.235706	training's l1: 0.483953	valid_1's l2: 0.235706	valid_1's l1: 0.483953
[ 0  1  0  7  3 10  0  0  0  2  0  0  0  4  0  0  0  0  2  4  0  0 12  1
 12 10 14  8]
