# LSTM3: Univariate encoder-decoder LSTM model with prior two week’s data as the input

In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential,Model
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import time
import tensorflow as tf
from keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed, concatenate

In [None]:
data = pd.read_csv('imputed_dataset.csv')
train_data = pd.read_csv('training_data.csv')
test_data = pd.read_csv('testing_data.csv')

In [None]:
train_data = train_data['Open']
test_data = test_data['Open']

In [None]:
from scipy.stats import pearsonr

def pearson_correlation(actual, predicted):
    correlation, _ = pearsonr(actual, predicted)
    return correlation
def evaluate_rmse_ratio(actual, predicted):
    actual = actual * (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    predicted = predicted *  (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    return rmse
def calculate_rmse_per_day(y_true, y_pred):
    y_true = y_true * (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    y_pred = y_pred *  (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    rmse_per_day = []
    for i in range(5):
        rmse_per_day.append(np.sqrt(mean_squared_error(y_true[:, i], y_pred[:, i])))
    return rmse_per_day

In [None]:
def create_dataset(data, time_steps=10, forecast_horizon=5):
    X, y = [], []
    for i in range(len(data) - time_steps - forecast_horizon + 1):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps:i + time_steps + forecast_horizon])
    return np.array(X), np.array(y)

In [None]:
X_train, y_train = create_dataset(train_data)
X_test, y_test = create_dataset(test_data)

In [None]:
def build_encoder_decoder_lstm():
    # Encoder
    encoder_inputs = Input(shape=(10, 1))
    encoder_lstm = LSTM(200, activation='relu')(encoder_inputs)

    # Repeat vector
    repeat_vector = RepeatVector(5)(encoder_lstm)

    # Decoder
    decoder_lstm = LSTM(200, activation='relu', return_sequences=True)(repeat_vector)
    decoder_dense = TimeDistributed(Dense(100, activation='relu'))(decoder_lstm)
    outputs = TimeDistributed(Dense(1))(decoder_dense)

    model = Model(inputs=encoder_inputs, outputs=outputs)
    model.compile(optimizer=Adam(), loss='mean_squared_error')

    return model

In [None]:
rmse_list = []
execution_time_list = []
corr_list = []
for i in range(10):
    start_time = time.time()

    third_model = build_encoder_decoder_lstm()

    history = third_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=0)

    predictions = third_model.predict(X_test)


    end_time = time.time()
    execution_time = end_time - start_time
    rmse = evaluate_rmse_ratio(y_test.reshape(-1,1), predictions.reshape(-1,1))
    rmse_list.append(rmse)
    execution_time_list.append(execution_time)
    #RMSE per day
    rmse_per_day = calculate_rmse_per_day(y_test, predictions)

    #Correlation
    corr = pearson_correlation(y_test.flatten(), predictions.flatten())
    corr_list.append(corr)

    for j, item in enumerate(rmse_per_day):
        print(f'RMSE for day {j+2}: {item}')
    print(f"Round {i+1}: Loss = {history.history['loss'][-1]}, RMSE = {rmse}, Execution time = {execution_time}, Correlation = {corr}")

RMSE for day 2: 190.47383767452652
RMSE for day 3: 256.5691151504091
RMSE for day 4: 312.8589331301012
RMSE for day 5: 349.1579598381023
RMSE for day 6: 397.37790805219424
Round 1: Loss = 0.0007307241321541369, RMSE = 309.77707741312133, Execution time = 71.98076176643372, Correlation = 0.9580730783106078
RMSE for day 2: 188.4068168261149
RMSE for day 3: 270.5910567684636
RMSE for day 4: 330.4048052008411
RMSE for day 5: 373.7967371438595
RMSE for day 6: 417.11659111723174
Round 2: Loss = 0.0007174254860728979, RMSE = 326.0657097010001, Execution time = 72.02632904052734, Correlation = 0.9586953866880545
RMSE for day 2: 194.16214399294824
RMSE for day 3: 236.98457256148023
RMSE for day 4: 284.3189249550526
RMSE for day 5: 328.1005156201515
RMSE for day 6: 373.6608306143952
Round 3: Loss = 0.0006984020583331585, RMSE = 290.50653747044373, Execution time = 85.60629153251648, Correlation = 0.9588913090154215
RMSE for day 2: 196.54925917745697
RMSE for day 3: 289.3255170602156
RMSE for day

In [None]:
np.mean(rmse_list)/data['Open'].mean()

0.031352537987958916

In [None]:
np.mean(execution_time_list)

78.14690918922425