In [18]:
import pandas as pd
import numpy as np
import lightgbm as lg

# first let us create some fake price data
n=10000
price_close = np.random.normal(size=(n, ))
price_close = np.array([sum(price_close[:i]) for i in range(n)])
price_open = np.roll(price_close, -1, axis=0)
price_open[0] = price_close[0]
price_high = np.maximum(price_open, price_close) + np.random.normal(size=(n, ))
price_high = np.minimum(price_open, price_close) - np.random.normal(size=(n, ))

# the target is the next close price, lets construct some features - this is usually referred to as feature engineering
features = []
feature_length = 100
targets = price_close[feature_length:]
for i in range(feature_length, n):
    feature_object = dict()
    feature_object['MA'] = np.mean(price_open[(i-feature_length):i])
    feature_object['STD'] = np.std(price_open[(i-feature_length):i])
    feature_object['MIN'] = np.min(price_open[(i-feature_length):i])
    feature_object['MAX'] = np.max(price_open[(i-feature_length):i])
    features.append(feature_object)
df = pd.DataFrame(features)

In [20]:
import matplotlib.pyplot as plt 

plt.plot(price_close)

plt.savefig("test.png")
plt.show()

<Figure size 640x480 with 1 Axes>

In [19]:
# now let us create a data set and a test set
from sklearn.model_selection import train_test_split

x_train, x_valid, y_train, y_valid = train_test_split(df, targets, test_size = 0.30)

d_train = lg.Dataset(x_train, y_train)
d_valid = lg.Dataset(x_valid, y_valid)

params = {
    'boosting_type': 'gbdt',
    'objective': 'regression'
}

estimator = lg.train(params, d_train, int(n/10), [d_valid])

[1]	valid_0's l2: 761.904
[2]	valid_0's l2: 621.4
[3]	valid_0's l2: 507.56
[4]	valid_0's l2: 414.928
[5]	valid_0's l2: 339.833
[6]	valid_0's l2: 279.077
[7]	valid_0's l2: 229.693
[8]	valid_0's l2: 189.516
[9]	valid_0's l2: 156.836
[10]	valid_0's l2: 130.373
[11]	valid_0's l2: 108.794
[12]	valid_0's l2: 91.3712
[13]	valid_0's l2: 77.0666
[14]	valid_0's l2: 65.4371
[15]	valid_0's l2: 56.0594
[16]	valid_0's l2: 48.32
[17]	valid_0's l2: 41.9038
[18]	valid_0's l2: 36.7194
[19]	valid_0's l2: 32.4533
[20]	valid_0's l2: 29.0122
[21]	valid_0's l2: 26.0928
[22]	valid_0's l2: 23.7781
[23]	valid_0's l2: 21.7984
[24]	valid_0's l2: 20.1487
[25]	valid_0's l2: 18.8172
[26]	valid_0's l2: 17.696
[27]	valid_0's l2: 16.761
[28]	valid_0's l2: 15.9845
[29]	valid_0's l2: 15.3232
[30]	valid_0's l2: 14.762
[31]	valid_0's l2: 14.2492
[32]	valid_0's l2: 13.8348
[33]	valid_0's l2: 13.4468
[34]	valid_0's l2: 13.1015
[35]	valid_0's l2: 12.8511
[36]	valid_0's l2: 12.5795
[37]	valid_0's l2: 12.3208
[38]	valid_0's l2:

In [17]:
mse = np.sqrt(np.mean( (estimator.predict(x_valid) - y_valid)**2))
mean_relative_error = mse / np.mean(price_close)
print('Mean relative error ' + str(mean_relative_error))

Mean relative error 0.490344210856799
