In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Dense

In [None]:
# Read in the independent and target variable datasets

stock = 'AAPL'
window_size = 20
pred_horizon = 10

X = pd.read_parquet(stock + '_X_' + str(window_size) + 'D.gzip')
y = pd.read_parquet(stock + '_y_' + str(pred_horizon) + 'D.gzip')
y = y.cumsum(axis=1)
y = y.iloc[:, -1:]

In [None]:
# Split the dataset into training, validation and test datasets

num_features = 20

scaler_y = StandardScaler()

q_80 = int(len(X) * .8)
q_90 = int(len(X) * .9)

X_train, y_train = X[:q_80].to_numpy(), y[:q_80].to_numpy()
X_val, y_val = X[q_80:q_90].to_numpy(), y[q_80:q_90].to_numpy()
X_test, y_test = X[q_90:].to_numpy(), y[q_90:].to_numpy()

X_train = X_train.reshape((-1, window_size, num_features))
X_val = X_val.reshape((-1, window_size, num_features))
X_test = X_test.reshape((-1, window_size, num_features))

y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.reshape(-1, 1))

In [None]:
# Define the early stopping callback to be used in all neural networks

early_stop = EarlyStopping(
    monitor='val_loss',        
    patience=5,                # wait 5 epochs for improvement
    restore_best_weights=True  # roll back to best weights
)

In [None]:
# Fit an RNN model to the dataset

model_RNN = Sequential([
    SimpleRNN(128, 
         input_shape=(window_size, num_features), 
         dropout=0.2,               # dropout on input (per time step)
         recurrent_dropout=0.2),    # dropout on hidden state (across time)
    Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(8, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(1)
])

model_RNN.compile(loss='mean_absolute_error', 
              optimizer=Adam(learning_rate=0.001))

model_RNN.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=100, callbacks=[early_stop])

In [None]:
# Fit an LSTM model to the dataset

model_LSTM = Sequential([
    LSTM(128, 
         input_shape=(window_size, num_features), 
         dropout=0.2,               # dropout on input (per time step)
         recurrent_dropout=0.2),    # dropout on hidden state (across time)
    Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(8, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(1)
])

model_LSTM.compile(loss='mean_absolute_error', 
              optimizer=Adam(learning_rate=0.001))

model_LSTM.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=100, callbacks=[early_stop])

In [None]:
# Fit a GRU model to the dataset

model_GRU = Sequential([
    GRU(128, 
         input_shape=(window_size, num_features), 
         dropout=0.2,               # dropout on input (per time step)
         recurrent_dropout=0.2),    # dropout on hidden state (across time)
    Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(8, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(1)
])

model_GRU.compile(loss='mean_absolute_error', 
              optimizer=Adam(learning_rate=0.001))

model_GRU.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=100, callbacks=[early_stop])

In [None]:
# First calculate the baseline absolute errors 
train_baseline_errors = np.abs(y_train)
val_baseline_errors = np.abs(y_val)
test_baseline_errors = np.abs(y_test)

# Calculate the mean absolute baseline errors for each dataset
avg_train_baseline_errors = train_baseline_errors.mean(axis=0)[0]
avg_val_baseline_errors = val_baseline_errors.mean(axis=0)[0]
avg_test_baseline_errors = test_baseline_errors.mean(axis=0)[0]

In [None]:
## Predict the return (close) using the trained RNN model and calculate errors

# Train dataset
y_train_pred_scaled_RNN = model_RNN.predict(X_train)
y_train_pred_RNN = scaler_y.inverse_transform(y_train_pred_scaled_RNN)
train_errors_RNN = np.abs(y_train_pred_RNN - y_train)

# Validation dataset
y_val_pred_scaled_RNN = model_RNN.predict(X_val)
y_val_pred_RNN = scaler_y.inverse_transform(y_val_pred_scaled_RNN)
val_errors_RNN = np.abs(y_val_pred_RNN - y_val)

# Test dataset
y_test_pred_scaled_RNN = model_RNN.predict(X_test)
y_test_pred_RNN = scaler_y.inverse_transform(y_test_pred_scaled_RNN)
test_errors_RNN = np.abs(y_test_pred_RNN - y_test)

# Calculate the mean absolute errors for each dataset
avg_train_errors_RNN = train_errors_RNN.mean(axis=0)[0]
avg_val_errors_RNN = val_errors_RNN.mean(axis=0)[0]
avg_test_errors_RNN = test_errors_RNN.mean(axis=0)[0]

In [None]:
## Predict the return (close) using the trained LSTM model and calculate errors

# Train dataset
y_train_pred_scaled_LSTM = model_LSTM.predict(X_train)
y_train_pred_LSTM = scaler_y.inverse_transform(y_train_pred_scaled_LSTM)
train_errors_LSTM = np.abs(y_train_pred_LSTM - y_train)

# Validation dataset
y_val_pred_scaled_LSTM = model_LSTM.predict(X_val)
y_val_pred_LSTM = scaler_y.inverse_transform(y_val_pred_scaled_LSTM)
val_errors_LSTM = np.abs(y_val_pred_LSTM - y_val)

# Test dataset
y_test_pred_scaled_LSTM = model_LSTM.predict(X_test)
y_test_pred_LSTM = scaler_y.inverse_transform(y_test_pred_scaled_LSTM)
test_errors_LSTM = np.abs(y_test_pred_LSTM - y_test)

# Calculate the mean absolute errors for each dataset
avg_train_errors_LSTM = train_errors_LSTM.mean(axis=0)[0]
avg_val_errors_LSTM = val_errors_LSTM.mean(axis=0)[0]
avg_test_errors_LSTM = test_errors_LSTM.mean(axis=0)[0]

In [None]:
## Predict the return (close) using the trained GRU model and calculate errors

# Train dataset
y_train_pred_scaled_GRU = model_GRU.predict(X_train)
y_train_pred_GRU = scaler_y.inverse_transform(y_train_pred_scaled_GRU)
train_errors_GRU = np.abs(y_train_pred_GRU - y_train)

# Validation dataset
y_val_pred_scaled_GRU = model_GRU.predict(X_val)
y_val_pred_GRU = scaler_y.inverse_transform(y_val_pred_scaled_GRU)
val_errors_GRU = np.abs(y_val_pred_GRU - y_val)

# Test dataset
y_test_pred_scaled_GRU = model_GRU.predict(X_test)
y_test_pred_GRU = scaler_y.inverse_transform(y_test_pred_scaled_GRU)
test_errors_GRU = np.abs(y_test_pred_GRU - y_test)

# Calculate the mean absolute errors for each dataset
avg_train_errors_GRU = train_errors_GRU.mean(axis=0)[0]
avg_val_errors_GRU = val_errors_GRU.mean(axis=0)[0]
avg_test_errors_GRU = test_errors_GRU.mean(axis=0)[0]

In [None]:
data_errors = {
    'Baseline': [avg_train_baseline_errors, avg_val_baseline_errors, avg_test_baseline_errors],
    'RNN': [avg_train_errors_RNN, avg_val_errors_RNN, avg_test_errors_RNN],
    'LSTM': [avg_train_errors_LSTM, avg_val_errors_LSTM, avg_test_errors_LSTM],
    'GRU': [avg_train_errors_GRU, avg_val_errors_GRU, avg_test_errors_GRU]
    }

df_errors = pd.DataFrame(data=data_errors, index=['Train', 'Validation', 'Test']) * 100
df_errors = df_errors.apply(lambda x: round(x, 4))
df_errors