# LSTM2: Univariate LSTM model with prior two week’s data as the input

In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential,Model
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import time
import tensorflow as tf
from keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed, concatenate

In [None]:
data = pd.read_csv('imputed_dataset.csv')
train_data = pd.read_csv('training_data.csv')
test_data = pd.read_csv('testing_data.csv')

In [None]:
train_data = train_data['Open'].values.reshape(-1, 1)
test_data = test_data['Open'].values.reshape(-1, 1)

In [None]:
from scipy.stats import pearsonr

def pearson_correlation(actual, predicted):
    actual = actual * (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    predicted = predicted *  (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    correlation, _ = pearsonr(actual, predicted)
    return correlation

def evaluate_rmse_ratio(actual, predicted):
    actual = actual * (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    predicted = predicted *  (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    return rmse

def calculate_rmse_per_day(y_true, y_pred):
    y_true = y_true * (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    y_pred = y_pred *  (data['Open'].max() - data['Open'].min()) + data['Open'].min()
    rmse_per_day = []
    for i in range(5):
        rmse_per_day.append(np.sqrt(mean_squared_error(y_true[:, i], y_pred[:, i])))
    return rmse_per_day

In [None]:
def create_dataset(data, time_steps=10, forecast_horizon=5):
    X, y = [], []
    for i in range(len(data) - time_steps - forecast_horizon + 1):
        X.append(data[i:(i + time_steps)])
        y.append(data[i + time_steps:i + time_steps + forecast_horizon].flatten())
    return np.array(X), np.array(y)

In [None]:
X_train, y_train = create_dataset(train_data)
X_test, y_test = create_dataset(test_data)

In [None]:
rmse_list = []
execution_time_list = []
corr_list = []
for i in range(10):
    start_time = time.time()

    model = Sequential()
    model.add(LSTM(200, activation='relu', input_shape=(10, 1)))
    model.add(Dense(200, activation='relu'))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(5))

    model.compile(optimizer=Adam(), loss='mean_squared_error')

    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=0)

    predictions = model.predict(X_test)

    end_time = time.time()
    execution_time = end_time - start_time

    #RMSE on test data
    rmse = evaluate_rmse_ratio(y_test, predictions)
    rmse_list.append(rmse)

    #Execution time
    execution_time_list.append(execution_time)

    #RMSE per day
    rmse_per_day = calculate_rmse_per_day(y_test, predictions)

    #Correlation
    corr = pearson_correlation(y_test.flatten(), predictions.flatten())
    corr_list.append(corr)

    for j, item in enumerate(rmse_per_day):
        print(f'RMSE for day {j+2}: {item}')
    print(f"Round {i+1}: Loss = {history.history['loss'][-1]}, RMSE = {rmse}, Execution time = {execution_time}, Correlation = {corr}")

RMSE for day 2: 207.60358378880346
RMSE for day 3: 275.3661509306015
RMSE for day 4: 307.7911725560603
RMSE for day 5: 367.15940333625076
RMSE for day 6: 401.11358709004935
Round 1: Loss = 0.0007212437340058386, RMSE = 319.1737170226277, Execution time = 41.40442085266113, Correlation = 0.959921330126973
RMSE for day 2: 168.31331512958948
RMSE for day 3: 224.3664693473529
RMSE for day 4: 274.65041929794415
RMSE for day 5: 322.5590746385593
RMSE for day 6: 370.70962979673357
Round 2: Loss = 0.0006779127870686352, RMSE = 281.2730074833206, Execution time = 43.21596360206604, Correlation = 0.9614481857591624
RMSE for day 2: 170.34401978453056
RMSE for day 3: 227.33832698773887
RMSE for day 4: 290.93177268209706
RMSE for day 5: 340.5721421436256
RMSE for day 6: 387.4738610038302
Round 3: Loss = 0.0007105185650289059, RMSE = 293.75720359351106, Execution time = 43.09826970100403, Correlation = 0.9592944823645672
RMSE for day 2: 172.60916547463805
RMSE for day 3: 232.12770714813723
RMSE for 

In [None]:
model.summary()

Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_19 (LSTM)              (None, 200)               161600    
                                                                 
 dense_57 (Dense)            (None, 200)               40200     
                                                                 
 dense_58 (Dense)            (None, 100)               20100     
                                                                 
 dense_59 (Dense)            (None, 5)                 505       
                                                                 
Total params: 222405 (868.77 KB)
Trainable params: 222405 (868.77 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
np.mean(rmse_list)/data['Open'].mean()

0.032026560011168756

In [None]:
np.mean(execution_time_list)

41.613519954681394

In [None]:
rmse_list

[319.1737170226277,
 281.2730074833206,
 293.75720359351106,
 291.38257637886693,
 324.8403011509299,
 333.12488570148685,
 302.4615453966423,
 333.0889785424162,
 329.389644109577,
 304.2826779274247]

In [None]:
data['Open'].mean()

9719.353362400685