In [1]:
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

In [3]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')

X = train.drop(['Id', 'target'], axis=1)
X_test = test.drop(['Id', 'target'], axis=1)

y = train['target'].values
y_test = test['target'].values

# create dataset for lightgbm
lgb_train = lgb.Dataset(X, y)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)

In [12]:
# specify your configurations as a dict
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': {'l2', 'l1'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'seed': 1337,
    'verbose': 0}

gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100,
                valid_sets=lgb_test,
                early_stopping_rounds=5)

pred = gbm.predict(X, num_iteration=gbm.best_iteration)
pred_test = gbm.predict(X_test, num_iterations=gbm.best_iteration)

[1]	valid_0's l1: 0.29413	valid_0's l2: 0.145976
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l1: 0.281992	valid_0's l2: 0.135259
[3]	valid_0's l1: 0.270315	valid_0's l2: 0.125245
[4]	valid_0's l1: 0.259566	valid_0's l2: 0.116378
[5]	valid_0's l1: 0.24962	valid_0's l2: 0.10849
[6]	valid_0's l1: 0.239864	valid_0's l2: 0.100928
[7]	valid_0's l1: 0.230605	valid_0's l2: 0.0941316
[8]	valid_0's l1: 0.221819	valid_0's l2: 0.0879086
[9]	valid_0's l1: 0.213935	valid_0's l2: 0.0824875
[10]	valid_0's l1: 0.206366	valid_0's l2: 0.077347
[11]	valid_0's l1: 0.199575	valid_0's l2: 0.0728097
[12]	valid_0's l1: 0.193512	valid_0's l2: 0.068906
[13]	valid_0's l1: 0.187442	valid_0's l2: 0.0651338
[14]	valid_0's l1: 0.181828	valid_0's l2: 0.0617804
[15]	valid_0's l1: 0.176542	valid_0's l2: 0.0586052
[16]	valid_0's l1: 0.170552	valid_0's l2: 0.055355
[17]	valid_0's l1: 0.165216	valid_0's l2: 0.0524908
[18]	valid_0's l1: 0.160125	valid_0's l2: 0.0498185
[19]	valid_0's l1: 0.1554

In [13]:
print('train rmse:', mean_squared_error(y, pred, squared=False))
print('test rmse:', mean_squared_error(y_test, pred_test, squared=False))

train rmse: 0.07587639060115457
test rmse: 0.13904910160543832
