In [24]:
import pandas as pd
import seaborn as sns
import numpy as np 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [25]:
EURGBP_data = pd.read_csv("EURGBP.csv", parse_dates=[0] ,names=["DateTime", "OPEN", "HIGH", "LOW", "CLOSE", "VOLUME"] ) 
EURUSD_data = pd.read_csv("EURUSD.csv", parse_dates=[0] ,names=["DateTime", "OPEN", "HIGH", "LOW", "CLOSE", "VOLUME"]) 
GBPUSD_data = pd.read_csv("GBPUSD.csv", parse_dates=[0] ,names=["DateTime", "OPEN", "HIGH", "LOW", "CLOSE", "VOLUME"]) 

# Prediction

In [27]:
EURGBP_data['OpenMean'] = EURGBP_data['OPEN'].rolling(60).mean()
EURGBP_data['OpenMin'] = EURGBP_data['OPEN'].rolling(60).min()   
EURGBP_data['OpenMax'] = EURGBP_data['OPEN'].rolling(60).max()   
EURGBP_data['OpenStd'] = EURGBP_data['OPEN'].rolling(60).std()    
EURGBP_data['OpenSum'] = EURGBP_data['OPEN'].rolling(60).sum()

#drop the first entries that are NaN values for the stats
EURGBP_data = EURGBP_data.dropna()

EURGBP_data

Unnamed: 0,DateTime,OPEN,HIGH,LOW,CLOSE,VOLUME,OpenMean,OpenMin,OpenMax,OpenStd,OpenSum
59,2013-01-01 18:02:00,0.81234,0.81234,0.81234,0.81234,0.0,0.812638,0.81199,0.81294,0.000222,48.75829
60,2013-01-01 18:03:00,0.81236,0.81245,0.81236,0.81245,0.0,0.812644,0.81199,0.81294,0.000209,48.75864
61,2013-01-01 18:04:00,0.81244,0.81245,0.81244,0.81245,0.0,0.812651,0.81203,0.81294,0.000193,48.75909
62,2013-01-01 18:05:00,0.81244,0.81248,0.81240,0.81247,0.0,0.812658,0.81223,0.81294,0.000177,48.75950
63,2013-01-01 18:06:00,0.81246,0.81246,0.81227,0.81231,0.0,0.812652,0.81223,0.81294,0.000177,48.75912
...,...,...,...,...,...,...,...,...,...,...,...
231371,2013-08-18 22:57:00,0.85293,0.85294,0.85293,0.85293,0.0,0.852902,0.85276,0.85311,0.000109,51.17410
231372,2013-08-18 22:58:00,0.85294,0.85295,0.85293,0.85293,0.0,0.852899,0.85276,0.85309,0.000106,51.17393
231373,2013-08-18 22:59:00,0.85292,0.85294,0.85292,0.85292,0.0,0.852896,0.85276,0.85307,0.000103,51.17376
231374,2013-08-18 23:00:00,0.85293,0.85293,0.85284,0.85286,0.0,0.852894,0.85276,0.85307,0.000101,51.17365


In [28]:
# X features: (OpenMean, OpenMin, OpenMax, OpenStd, OpenSum)
# y: Close price
X = EURGBP_data.iloc[:, [6,7,8,9,10]].values 
y = EURGBP_data.iloc[:, 4].values

In [29]:
from sklearn.model_selection import TimeSeriesSplit

#Split data into test and training sets
splits = TimeSeriesSplit(n_splits=5)

for train_index, test_index in splits.split(X,y):
    X_train = X[train_index]
    X_test = X[test_index]
    y_train = y[train_index]
    y_test = y[test_index]


## LSTM keras baseline model

In [30]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout


#When implementing LSTM, we need to reshape X train and test to have 3 dimesional where each axis corresponds to the (batch_size, time_step, feature dimesion).
#The 1 in the last parameter below is the time step
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

#Create LSTM model and fit
LSTM_model = Sequential()
#1st layer
LSTM_model.add(LSTM(units = 50, activation='relu', input_shape = (X_train.shape[1], 1)))

#1 neuron in the output layer to predict the normalised stock price
#The output dimension is 1 since we are predicting 1 price each time.
LSTM_model.add(Dense(units = 1))
#Compiling the LSTM model
LSTM_model.compile(optimizer = 'adam', loss = 'mean_squared_error')
#Train the model using the training set
LSTM_model.fit(X_train, y_train, epochs = 50, batch_size = 32)#1epochs =00, batch_size = 32)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fba4c387790>

In [31]:
# make predictions
testPredict = LSTM_model.predict(X_test)

In [32]:
from sklearn.metrics import mean_squared_error, r2_score

mse2 = mean_squared_error(y_test, testPredict[:,0])
rmse2 = np.sqrt(mean_squared_error(y_test, testPredict[:,0]))
r2 = r2_score(y_test, testPredict[:,0])

print('Test Score:%.2f RMSE' % (rmse2))
print('%.2f MSE ' % (mse2))
print('%.2f R2 ' % (r2))

Test Score:0.00 RMSE
0.00 MSE 
0.98 R2 
