In [18]:
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error

In [19]:
print('Loading data...')
# load or create your dataset
df_train = pd.read_csv('data/regression.train', header=None, sep='\t')
df_test = pd.read_csv('data/regression.test', header=None, sep='\t')

Loading data...


In [20]:
y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)

In [21]:
X_test.head(3)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,19,20,21,22,23,24,25,26,27,28
0,0.644,0.247,-0.447,0.862,0.374,0.854,-1.126,-0.79,2.173,1.015,...,-0.19,-0.744,3.102,0.958,1.061,0.98,0.875,0.581,0.905,0.796
1,0.385,1.8,1.037,1.044,0.349,1.502,-0.966,1.734,0.0,0.966,...,-0.44,0.638,3.102,0.695,0.909,0.981,0.803,0.813,1.149,1.116
2,1.214,-0.166,0.004,0.505,1.434,0.628,-1.174,-1.23,1.087,0.579,...,-1.383,1.355,0.0,0.848,0.911,1.043,0.931,1.058,0.744,0.696


In [22]:
print("Shape y_train: ",y_train.shape)
print("Shape y_test: {}".format(y_test.shape))
print("Shape x_train: ",X_train.shape)
print("Shape x_test: {}".format(X_test.shape))

Shape y_train:  (7000,)
Shape y_test: (500,)
Shape x_train:  (7000, 28)
Shape x_test: (500, 28)


In [23]:
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

In [31]:
params = {
    "boosting_type":"gbdt",
    "objective":"regression",
    "metric":{"l2","l1"},
    "num_leaves":10,
    "learning_rate":0.05,
    "feature_fraction":0.9,
    "bagging_fraction":0.8,
    "bagging_freq":5,
    "verbose":1
    
}

In [32]:
print('Starting training...')
# train
gbm = lgb.train(params,
               lgb_train,
               num_boost_round=100,
               valid_sets=lgb_eval,
               early_stopping_rounds=5)

print("Saving model")
gbm.save_model("model.txt")


Starting training...
[1]	valid_0's l1: 0.493203	valid_0's l2: 0.244226
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l1: 0.489957	valid_0's l2: 0.241163
[3]	valid_0's l1: 0.486727	valid_0's l2: 0.238137
[4]	valid_0's l1: 0.483169	valid_0's l2: 0.234972
[5]	valid_0's l1: 0.480059	valid_0's l2: 0.232328
[6]	valid_0's l1: 0.47721	valid_0's l2: 0.230007
[7]	valid_0's l1: 0.474536	valid_0's l2: 0.227737
[8]	valid_0's l1: 0.471893	valid_0's l2: 0.225744
[9]	valid_0's l1: 0.469036	valid_0's l2: 0.223613
[10]	valid_0's l1: 0.466559	valid_0's l2: 0.221777
[11]	valid_0's l1: 0.46379	valid_0's l2: 0.219774
[12]	valid_0's l1: 0.461196	valid_0's l2: 0.218023
[13]	valid_0's l1: 0.458734	valid_0's l2: 0.216418
[14]	valid_0's l1: 0.456645	valid_0's l2: 0.215162
[15]	valid_0's l1: 0.454152	valid_0's l2: 0.213573
[16]	valid_0's l1: 0.452468	valid_0's l2: 0.212506
[17]	valid_0's l1: 0.450711	valid_0's l2: 0.211434
[18]	valid_0's l1: 0.448619	valid_0's l2: 0.209926
[19]	valid_

<lightgbm.basic.Booster at 0x1a30e0f4be0>

In [33]:
print("Predict")
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)

Predict


In [34]:
#eval
print("The RMSE of prediction is: {}".format(mean_squared_error(y_test, y_pred)**0.5))

The RMSE of prediction is: 0.4213506579706491
