In [None]:
# Stacked LSTM for international airline passengers problem with memory
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import datetime
import pylab as pl
from IPython import display
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

%matplotlib inline

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
# fix random seed for reproducibility
np.random.seed(7)

In [None]:
# load the dataset
dataframe = pd.read_csv('trades.csv', index_col=0, usecols=['timestamp','price','volume'], header=0, engine='python')
dataframe.index = pd.to_datetime(dataframe.index, unit='s')
dataframe_pv = dataframe.resample('2H').agg({'price': np.mean, 'volume': np.sum})
dataframe_ohlc = dataframe['price'].resample('2H').ohlc()

print(dataframe_pv.head(5))
print(dataframe_ohlc.head(5))

dataframe = pd.concat([dataframe_pv, dataframe_ohlc], axis=1)
dataframe = dataframe.fillna(method='pad')
#dataframe['date'] = dataframe.index.date
dataframe['hour'] = [d.hour for d in dataframe.index.time]
dataframe['minute'] = [d.minute for d in dataframe.index.time]
# dataframe['year'], dataframe['month'], dataframe['day'] = dataframe.index.date.split('-')
# dataframe['hour'], dataframe['minute'], dataframe['second'] = dataframe.index.time.split(':')
print(dataframe.head(5))
print(dataframe.tail(5))


In [None]:
# preprocess dataset
print(dataframe)
dataframe['price'] = np.log(dataframe['price'])
dataframe['volume'] = np.log(dataframe['volume'])
dataframe['open'] = np.log(dataframe['open'])
dataframe['high'] = np.log(dataframe['high'])
dataframe['low'] = np.log(dataframe['low'])
dataframe['close'] = np.log(dataframe['close'])
ewma = dataframe.ewm(halflife=7).mean()
dataframe = dataframe - ewma

dataframe['price'].plot()

dataset = dataframe.values
#print(np.exp(dataset))

In [None]:
# split into train and test sets
train_size = int(len(dataset) * 0.95)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

print(train[-1])

scaler = MinMaxScaler(feature_range=(0, 1))
scaler_price = MinMaxScaler(feature_range=(0, 1))
scaler_price.fit_transform(train[:,0].reshape(-1, 1))
train = scaler.fit_transform(train)
test = scaler.transform(test)

print(train[-1])

# scaler_price = MinMaxScaler(feature_range=(0, 1))
# train[:,0] = scaler_price.fit_transform(train[:,0])
# test[:,0] = scaler_price.transform(test[:,0])

# scaler_volume = MinMaxScaler(feature_range=(0, 1))
# train[:,1] = scaler_volume.fit_transform(train[:,1])
# test[:,1] = scaler_volume.transform(test[:,1])

# reshape into X=t and Y=t+1
look_back = 5
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]

trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 8))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 8))

In [None]:
trainError = []
testError = []

# create and fit the LSTM network
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 8), stateful=True, return_sequences=True))
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 8), stateful=True))
model.add(Dense(1))
try:
    model.load_weights("model_time.h5")
except:
    pass
model.compile(loss='mean_squared_error', optimizer='adam')

for i in range(300):
    model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
    model.reset_states()
    model.save_weights("model_time.h5")
    
    model_test = model
    
    # make predictions
    trainPredict = model_test.predict(trainX, batch_size=batch_size)
    model_test.reset_states()
    testPredict = model_test.predict(testX, batch_size=batch_size)
    # invert predictions
    print(trainPredict[-1])
    #print(trainY[-1])

    trainPredict_test = scaler_price.inverse_transform(trainPredict)
    trainY_test = scaler_price.inverse_transform(trainY.reshape(-1, 1))
    testPredict_test = scaler_price.inverse_transform(testPredict)
    testY_test = scaler_price.inverse_transform(testY.reshape(-1, 1))
    
#     print(trainY_test)
#     print(trainPredict_test)
    
    # calculate root mean squared error
    trainScore = math.sqrt(mean_squared_error(trainY_test, trainPredict_test[:,0]))
    trainError.append(trainScore)
    print('Train Score: %.5f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY_test, testPredict_test[:,0]))
    testError.append(testScore)
    print('Test Score: %.5f RMSE' % (testScore))
    
    # shift train predictions for plotting
    trainYPlot = np.empty_like(dataset)
    trainYPlot[:, :] = np.nan
    trainYPlot[look_back:len(trainPredict_test)+look_back,0] = trainY_test[:,0]
    # shift test predictions for plotting
    testYPlot = np.empty_like(dataset)
    testYPlot[:, :] = np.nan
    testYPlot[len(trainPredict_test)+(look_back*2)+1:len(dataset)-1,0] = testY_test[:,0]
    
    # shift train predictions for plotting
    trainPredictPlot = np.empty_like(dataset)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(trainPredict_test)+look_back] = trainPredict_test
    # shift test predictions for plotting
    testPredictPlot = np.empty_like(dataset)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(trainPredict_test)+(look_back*2)+1:len(dataset)-1] = testPredict_test
    # plot baseline and predictions
    plt.plot(trainYPlot)
    plt.plot(testYPlot)
    plt.plot(trainPredictPlot)
    plt.plot(testPredictPlot)
    plt.show()
    
    plt.plot(trainError)
    plt.plot(testError)
    plt.show()

In [None]:
print(trainYPlot.shape)
print(trainPredictPlot.shape)
print(testYPlot.shape)
print(testPredictPlot.shape)

trainYPlot2 = np.exp(trainYPlot+ewma)
testYPlot2 = np.exp(testYPlot+ewma)
trainPredictPlot2 = np.exp(trainPredictPlot+ewma)
testPredictPlot2 = np.exp(testPredictPlot+ewma)

# print(trainYPlot)
# print(trainPredictPlot)
# print(testYPlot)
# print(testPredictPlot)



import matplotlib

matplotlib.rcParams['figure.figsize'] = (18,16)
plt.plot(trainYPlot2['price'])
plt.plot(testYPlot2['price'])
plt.plot(trainPredictPlot2['price'])
plt.plot(testPredictPlot2['price'])
plt.plot(trainYPlot2['price'] - trainPredictPlot2['price'])
plt.plot(testYPlot2['price'] - testPredictPlot2['price'])
plt.show()

plt.plot(testYPlot2['price'])
plt.plot(testPredictPlot2['price'])
plt.plot(testYPlot2['price'] - testPredictPlot2['price'])
plt.show()

trainYError = trainYPlot2['price'].replace([np.inf, -np.inf], np.nan).dropna(how="all")
trainPredictError = trainPredictPlot2['price'].replace([np.inf, -np.inf], np.nan).dropna(how="all")
testYError = testYPlot2['price'].replace([np.inf, -np.inf], np.nan).dropna(how="all")
testPredictError = testPredictPlot2['price'].replace([np.inf, -np.inf], np.nan).dropna(how="all")

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainYError, trainPredictError))
print('Train Score: %.5f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testYError, testPredictError))
print('Test Score: %.5f RMSE' % (testScore))