In [1]:
%%HTML
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>

### Library

In [2]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

%matplotlib inline
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import renom as rm
from renom.optimizer import Adam, Adagrad
from renom.cuda import set_cuda_active
# if you would like to use GPU, set True, otherwise you should be set to False
set_cuda_active(False)

In [3]:
def create_dataset(data, look_back, pred_length):
    exp, target = [], []
    for i in range(len(data) - look_back - pred_length):
        exp.append(data[i : i+look_back, :])
        target.append(data[i + look_back : i + look_back + pred_length, 9].T[0])
        
    n_features = np.array(exp).shape[2]
    exp = np.reshape(np.array(exp), [-1, look_back, n_features])
    target = np.reshape(np.array(target), [-1, 1])
    return exp, target

In [4]:
def split_data(X, y, test_size=0.1):
    pos = int(round(len(X) * (1-test_size)))
    X_train, y_train = X[:pos], y[:pos]
    X_test, y_test = X[pos:], y[pos:]
    return X_train, y_train, X_test, y_test, pos

In [5]:
gran = 'H1'

In [6]:
with open('../intermediate_data/prep_data_{}.pickle'.format(gran), mode='rb') as f:
    df = pickle.load(f)

In [7]:
df_std = df.copy()
stds, means = [], []
for col in df:
    std = df[col].std()
    mean = df[col].mean()
    df_std[col] = (df[col] - mean) / std
    stds.append(std)
    means.append(mean)
data = np.array(df_std)

In [8]:
look_back = 10
pred_length = 1
X, y = create_dataset(data, look_back, pred_length)

In [9]:
X_train, y_train, X_test, y_test, pos = split_data(X, y)

In [10]:
sequential = rm.Sequential([
    rm.Lstm(30),
    rm.Lstm(10),
    rm.Dense(pred_length)
])

In [11]:
batch_size = 1024
epoch = 1000
N = len(X_train)
T = X_train.shape[1]

learning_curve = []
test_learning_curve = []
optimizer = Adagrad()
for i in range(epoch):
    loss = 0
    test_loss = 0
    perm = np.random.permutation(N)
    for j in range(N//batch_size):
        train_batch = X_train[perm[j*batch_size : (j+1)*batch_size]]
        response_batch = y_train[perm[j*batch_size : (j+1)*batch_size]]
        l = 0
        with sequential.train():
            for t in range(T):
                z = sequential(train_batch[:, t, :])
                l = rm.mse(z, response_batch)
            sequential.truncate()
        l.grad().update(optimizer)
        loss += l.as_ndarray()
    loss = loss / (N // batch_size)
    l_test = 0
    for t in range(T):
        z = sequential(X_test[:, t, :])
        l_test = rm.mse(z, y_test)
    sequential.truncate()
    test_loss += l_test.as_ndarray()
    if i % 10 == 0:
        print("epoch:{:04d} loss:{:.5f} test_loss:{:.5f}".format(i, loss, test_loss))
    learning_curve.append(loss)
    test_learning_curve.append(test_loss)

epoch:0000 loss:0.52684 test_loss:0.29397


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(10, 4))
plt.plot(learning_curve, label='loss')
plt.plot(test_learning_curve, label='test_loss', alpha=0.6)
plt.title('Learning curve')
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.ylim(0, 2)
plt.legend()
plt.grid()

In [None]:
# predict test value
for t in range(T):
    y_test_pred = sequential(X_test[:, t, :])
sequential.truncate()
for t in range(T):
    y_train_pred = sequential(X_train[:, t, :])
sequential.truncate()

# # convert standardized heating-load to its original unit
y_train = y_train * stds[9] + means[9]
y_train_pred = y_train_pred * stds[9] + means[9]
y_test = y_test * stds[9] + means[9]
y_test_pred = y_test_pred * stds[9] + means[9]

# Root mean squared error
print("Root mean squared error:{}".format(np.sqrt(mean_squared_error(y_test, y_test_pred))))

ulim = 4
llim = -4
plt.figure(figsize=(8, 8))
plt.plot([llim, ulim], [llim, ulim], c='k', alpha=0.6) # diagonal line
plt.scatter(y_train, y_train_pred, label = 'train')
plt.scatter(y_test, y_test_pred, label = 'test')
plt.xlim(llim, ulim)
plt.ylim(llim, ulim)
plt.xlabel('Actual', fontsize=16)
plt.ylabel('Prediction', fontsize=16)
plt.legend()
plt.grid()

In [None]:
train_price = []
tr_p = df['closeAsk'][0]
for v in y_train:
    tr_p += v[0]
    train_price.append(tr_p)
    
train_pred_price = []
tr_pr_p = df['closeAsk'][0]
for v in y_train_pred:
    tr_pr_p += v[0]
    train_pred_price.append(tr_pr_p)

test_price = []
te_p = df['closeAsk'][y_train.shape[0]]
for v in y_test:
    te_p += v[0]
    test_price.append(te_p)
    
test_pred_price = []
te_pr_p = df['closeAsk'][y_train.shape[0]]
for v in y_test_pred:
    te_pr_p += v[0]
    test_pred_price.append(te_pr_p)

In [None]:
plt.figure(figsize = (20,5))
plt.plot(df_std[:pos].index, y_train, label = 'train_actual')
plt.plot(df_std[:pos].index, y_train_pred, label = 'train_prediction')
plt.ylabel('delta_close')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (20,5))
plt.plot(df_std[pos:-1 * (look_back + pred_length)].index, y_test, label = 'test_actual')
plt.plot(df_std[pos:-1 * (look_back + pred_length)].index, y_test_pred, label = 'test_prediction')
plt.ylabel('delta_close')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (20,5))
plt.plot(df_std[pos:-1 * (look_back + pred_length)].index, y_test, label = 'test_actual')
plt.plot(df_std[pos:-1 * (look_back + pred_length)].index, y_test_pred, label = 'test_prediction')
plt.ylabel('delta_close')
plt.xlim('2017-07-01','2017-10-01')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (20,5))
plt.plot(df_std[:pos].index, train_price, label = 'train_actual')
plt.plot(df_std[:pos].index, train_pred_price, label = 'train_prediction')
plt.ylabel('price')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (20,5))
plt.plot(df_std[pos:-1 * (look_back + pred_length)].index, test_price, label = 'test_actual')
plt.plot(df_std[pos:-1 * (look_back + pred_length)].index, test_pred_price, label = 'test_prediction')
plt.ylabel('price')
plt.legend()
plt.show()

In [None]:
true_updown = y_train
true_updown[true_updown > 0.1] = 1
true_updown[true_updown < -0.1] = -1
true_updown[(true_updown > -0.1) & (true_updown < 0.1)] = 0

pred_updown = np.array(y_train_pred)
pred_updown[pred_updown > 0.1] = 1
pred_updown[pred_updown < -0.1] = -1
pred_updown[(pred_updown > -0.1) & (pred_updown < 0.1)] = 0

df_updown = pd.DataFrame(index = df_std[:pos].index, columns = ['true', 'pred'])
df_updown['true'] = true_updown
df_updown['pred'] = pred_updown
df_updown['error'] = np.zeros(df_updown.shape[0])
df_updown.loc[df_updown['true'] == df_updown['pred'],'error'] = 1
df_updown.loc[df_updown['true'] != df_updown['pred'],'error'] = 0
print('correct = ', df_updown[df_updown['error'] == 1].shape[0])
print('not correct = ', df_updown[df_updown['error'] == 0].shape[0])

In [None]:
true_updown = y_test
true_updown[true_updown > 0.1] = 1
true_updown[true_updown < -0.1] = -1
true_updown[(true_updown > -0.1) & (true_updown < 0.1)] = 0

pred_updown = np.array(y_test_pred)
pred_updown[pred_updown > 0.1] = 1
pred_updown[pred_updown < -0.1] = -1
pred_updown[(pred_updown > -0.1) & (pred_updown < 0.1)] = 0

df_updown = pd.DataFrame(index = df_std[pos:-1 * (look_back + pred_length)].index, columns = ['true', 'pred'])
df_updown['true'] = true_updown
df_updown['pred'] = pred_updown
df_updown['error'] = np.zeros(df_updown.shape[0])
df_updown.loc[df_updown['true'] == df_updown['pred'],'error'] = 1
df_updown.loc[df_updown['true'] != df_updown['pred'],'error'] = 0
print('correct = ', df_updown[df_updown['error'] == 1].shape[0])
print('not correct = ', df_updown[df_updown['error'] == 0].shape[0])

In [None]:
sequential.save("../model/lstm_{}_{}.h5".format(gran, look_back))

In [None]:
with open('../model/std_scaler_{}.pickle'.format(gran), mode='wb') as f:
    pickle.dump(stds, f)

In [None]:
with open('../model/mean_scaler_{}.pickle'.format(gran), mode='wb') as f:
    pickle.dump(means, f)