In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Dense

2025-04-19 15:55:06.693635: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Read in the independent and target variable datasets

stock = 'HPQ'
window_size = 20
pred_horizon = 10

X = pd.read_parquet(stock + '_X_' + str(window_size) + 'D.gzip')
y = pd.read_parquet(stock + '_y_' + str(pred_horizon) + 'D.gzip')
y = y.cumsum(axis=1)
y = y.iloc[:, -1:]

In [3]:
# Split the dataset into training, validation and test datasets

num_features = 20

scaler_y = StandardScaler()

q_80 = int(len(X) * .8)
q_90 = int(len(X) * .9)

X_train, y_train = X[:q_80].to_numpy(), y[:q_80].to_numpy()
X_val, y_val = X[q_80:q_90].to_numpy(), y[q_80:q_90].to_numpy()
X_test, y_test = X[q_90:].to_numpy(), y[q_90:].to_numpy()

X_train = X_train.reshape((-1, window_size, num_features))
X_val = X_val.reshape((-1, window_size, num_features))
X_test = X_test.reshape((-1, window_size, num_features))

y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.reshape(-1, 1))

In [4]:
# Define the early stopping callback to be used in all neural networks

early_stop = EarlyStopping(
    monitor='val_loss',        
    patience=5,                # wait 5 epochs for improvement
    restore_best_weights=True  # roll back to best weights
)

In [5]:
# Fit an RNN model to the dataset

model_RNN = Sequential([
    SimpleRNN(128, 
         input_shape=(window_size, num_features), 
         dropout=0.2,               # dropout on input (per time step)
         recurrent_dropout=0.2),    # dropout on hidden state (across time)
    Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(8, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(1)
])

model_RNN.compile(loss='mean_absolute_error', 
              optimizer=Adam(learning_rate=0.001))

model_RNN.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=100, callbacks=[early_stop])

  super().__init__(**kwargs)


Epoch 1/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - loss: 1.6369 - val_loss: 1.0450
Epoch 2/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.9514 - val_loss: 0.7888
Epoch 3/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.7989 - val_loss: 0.7516
Epoch 4/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.7603 - val_loss: 0.7399
Epoch 5/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.7543 - val_loss: 0.7359
Epoch 6/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.7680 - val_loss: 0.7332
Epoch 7/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.7588 - val_loss: 0.7326
Epoch 8/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.7576 - val_loss: 0.7322
Epoch 9/100
[1m302/302[0m 

<keras.src.callbacks.history.History at 0x15326e9f0>

In [6]:
# Fit an LSTM model to the dataset

model_LSTM = Sequential([
    LSTM(128, 
         input_shape=(window_size, num_features), 
         dropout=0.2,               # dropout on input (per time step)
         recurrent_dropout=0.2),    # dropout on hidden state (across time)
    Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(8, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(1)
])

model_LSTM.compile(loss='mean_absolute_error', 
              optimizer=Adam(learning_rate=0.001))

model_LSTM.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=100, callbacks=[early_stop])

Epoch 1/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 44ms/step - loss: 1.3549 - val_loss: 0.7521
Epoch 2/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - loss: 0.7645 - val_loss: 0.7351
Epoch 3/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 40ms/step - loss: 0.7618 - val_loss: 0.7325
Epoch 4/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - loss: 0.7513 - val_loss: 0.7321
Epoch 5/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 39ms/step - loss: 0.7416 - val_loss: 0.7322
Epoch 6/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - loss: 0.7474 - val_loss: 0.7321
Epoch 7/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - loss: 0.7578 - val_loss: 0.7322
Epoch 8/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - loss: 0.7543 - val_loss: 0.7322
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x1548069f0>

In [7]:
# Fit a GRU model to the dataset

model_GRU = Sequential([
    GRU(128, 
         input_shape=(window_size, num_features), 
         dropout=0.2,               # dropout on input (per time step)
         recurrent_dropout=0.2),    # dropout on hidden state (across time)
    Dense(64, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(32, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(16, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(8, activation='relu', kernel_regularizer=regularizers.l1(0.001)),
    Dense(1)
])

model_GRU.compile(loss='mean_absolute_error', 
              optimizer=Adam(learning_rate=0.001))

model_GRU.fit(X_train, y_train_scaled, validation_data=(X_val, y_val_scaled), epochs=100, callbacks=[early_stop])

Epoch 1/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 40ms/step - loss: 1.3728 - val_loss: 0.7548
Epoch 2/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 37ms/step - loss: 0.7547 - val_loss: 0.7344
Epoch 3/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - loss: 0.7570 - val_loss: 0.7330
Epoch 4/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - loss: 0.7485 - val_loss: 0.7324
Epoch 5/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - loss: 0.7514 - val_loss: 0.7323
Epoch 6/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - loss: 0.7492 - val_loss: 0.7320
Epoch 7/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - loss: 0.7572 - val_loss: 0.7321
Epoch 8/100
[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - loss: 0.7505 - val_loss: 0.7322
Epoch 9/100
[1m

<keras.src.callbacks.history.History at 0x1552e10d0>

In [8]:
# First calculate the baseline absolute errors 
train_baseline_errors = np.abs(y_train)
val_baseline_errors = np.abs(y_val)
test_baseline_errors = np.abs(y_test)

# Calculate the mean absolute baseline errors for each dataset
avg_train_baseline_errors = train_baseline_errors.mean(axis=0)[0]
avg_val_baseline_errors = val_baseline_errors.mean(axis=0)[0]
avg_test_baseline_errors = test_baseline_errors.mean(axis=0)[0]

In [9]:
## Predict the return (close) using the trained RNN model and calculate errors

# Train dataset
y_train_pred_scaled_RNN = model_RNN.predict(X_train)
y_train_pred_RNN = scaler_y.inverse_transform(y_train_pred_scaled_RNN)
train_errors_RNN = np.abs(y_train_pred_RNN - y_train)

# Validation dataset
y_val_pred_scaled_RNN = model_RNN.predict(X_val)
y_val_pred_RNN = scaler_y.inverse_transform(y_val_pred_scaled_RNN)
val_errors_RNN = np.abs(y_val_pred_RNN - y_val)

# Test dataset
y_test_pred_scaled_RNN = model_RNN.predict(X_test)
y_test_pred_RNN = scaler_y.inverse_transform(y_test_pred_scaled_RNN)
test_errors_RNN = np.abs(y_test_pred_RNN - y_test)

# Calculate the mean absolute errors for each dataset
avg_train_errors_RNN = train_errors_RNN.mean(axis=0)[0]
avg_val_errors_RNN = val_errors_RNN.mean(axis=0)[0]
avg_test_errors_RNN = test_errors_RNN.mean(axis=0)[0]

[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [10]:
## Predict the return (close) using the trained LSTM model and calculate errors

# Train dataset
y_train_pred_scaled_LSTM = model_LSTM.predict(X_train)
y_train_pred_LSTM = scaler_y.inverse_transform(y_train_pred_scaled_LSTM)
train_errors_LSTM = np.abs(y_train_pred_LSTM - y_train)

# Validation dataset
y_val_pred_scaled_LSTM = model_LSTM.predict(X_val)
y_val_pred_LSTM = scaler_y.inverse_transform(y_val_pred_scaled_LSTM)
val_errors_LSTM = np.abs(y_val_pred_LSTM - y_val)

# Test dataset
y_test_pred_scaled_LSTM = model_LSTM.predict(X_test)
y_test_pred_LSTM = scaler_y.inverse_transform(y_test_pred_scaled_LSTM)
test_errors_LSTM = np.abs(y_test_pred_LSTM - y_test)

# Calculate the mean absolute errors for each dataset
avg_train_errors_LSTM = train_errors_LSTM.mean(axis=0)[0]
avg_val_errors_LSTM = val_errors_LSTM.mean(axis=0)[0]
avg_test_errors_LSTM = test_errors_LSTM.mean(axis=0)[0]

[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


In [11]:
## Predict the return (close) using the trained GRU model and calculate errors

# Train dataset
y_train_pred_scaled_GRU = model_GRU.predict(X_train)
y_train_pred_GRU = scaler_y.inverse_transform(y_train_pred_scaled_GRU)
train_errors_GRU = np.abs(y_train_pred_GRU - y_train)

# Validation dataset
y_val_pred_scaled_GRU = model_GRU.predict(X_val)
y_val_pred_GRU = scaler_y.inverse_transform(y_val_pred_scaled_GRU)
val_errors_GRU = np.abs(y_val_pred_GRU - y_val)

# Test dataset
y_test_pred_scaled_GRU = model_GRU.predict(X_test)
y_test_pred_GRU = scaler_y.inverse_transform(y_test_pred_scaled_GRU)
test_errors_GRU = np.abs(y_test_pred_GRU - y_test)

# Calculate the mean absolute errors for each dataset
avg_train_errors_GRU = train_errors_GRU.mean(axis=0)[0]
avg_val_errors_GRU = val_errors_GRU.mean(axis=0)[0]
avg_test_errors_GRU = test_errors_GRU.mean(axis=0)[0]

[1m302/302[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [12]:
data_errors = {
    'Baseline': [avg_train_baseline_errors, avg_val_baseline_errors, avg_test_baseline_errors],
    'RNN': [avg_train_errors_RNN, avg_val_errors_RNN, avg_test_errors_RNN],
    'LSTM': [avg_train_errors_LSTM, avg_val_errors_LSTM, avg_test_errors_LSTM],
    'GRU': [avg_train_errors_GRU, avg_val_errors_GRU, avg_test_errors_GRU]
    }

df_errors = pd.DataFrame(data=data_errors, index=['Train', 'Validation', 'Test']) * 100
df_errors = df_errors.apply(lambda x: round(x, 4))
df_errors

Unnamed: 0,Baseline,RNN,LSTM,GRU
Train,5.2203,5.2153,5.2144,5.2145
Validation,5.0992,5.0964,5.0987,5.0979
Test,4.5862,4.5562,4.5428,4.5469
