In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv1D, Flatten, MaxPooling1D, Dense, Reshape, Dropout, LeakyReLU, MultiHeadAttention, TimeDistributed, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from keras.regularizers import l2

from shared import read_dataset, plot_results, evaluate_price_predictions, mean_abs_error

2023-12-06 15:49:26.314296: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-06 15:49:26.345204: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-06 15:49:26.345234: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-06 15:49:26.345250: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-06 15:49:26.349809: I tensorflow/core/platform/cpu_feature_g

### Data fetching

In [3]:
start = "2013-10-01"
end = "2023-10-01"
target_column_name = 'Close'
path = '../models/lstm_model/predictor_adj_close.h5'

data = read_dataset('../data/DAL.MI_ta.csv', start, end)

data.dropna(inplace=True)

cols = [target_column_name] + [ col for col in data if col != target_column_name]
target_column = list(data.columns).index(target_column_name)
data = data[cols]

print(f"#Trading Days: {data.shape}")
print(cols)

#Trading Days: (2541, 23)
['Close', 'Open', 'High', 'Low', 'Volume', 'ma7', 'ma7_diff', 'ma21', 'ma21_diff', '26ema', '26ema_diff', '12ema', '12ema_diff', 'MACD', 'upper_band', 'lower_band', 'momentum', 'fourier_short', 'fourier_medium', 'fourier_long', 'Volatility_21', 'Close Diff', 'Open Diff']


### Data refactoring

In [None]:
# Define feature array and target array to train the model.
data_array = np.array(data.values)
target_array = np.array(data[target_column_name].values).reshape(-1, 1)

# Normalize the data
scaler_data = MinMaxScaler()
scaler_data.fit(data_array)
data_array = scaler_data.transform(data_array)

scaler_target = MinMaxScaler()
scaler_target.fit(target_array)
target_array = scaler_target.transform(target_array)

# Split the data
train_size = int(len(data_array) * 0.70)

def create_sequences(data, target, seq_length):
    sequence_data = []
    sequence_target = []
    for i in range(seq_length, len(data)):
        sequence_data.append(data[i-seq_length:i])
        sequence_target.append(target[i])
    return np.array(sequence_data), np.array(sequence_target)

SEQUENCE_LENGTH = 100
data_sequences, target_sequences = create_sequences(data_array, target_array, SEQUENCE_LENGTH)

train_data, test_data = data_sequences[:train_size], data_sequences[train_size:]
train_target, test_target = target_sequences[:train_size], target_sequences[train_size:]

print(f'train_data: {train_data.shape} triat_target: {train_target.shape}')
print(f'test_data: {test_data.shape} test_target: {test_target.shape}')

## Model definition
We will use three different types of Deep Neural Networks:
 - LSTM
 - CNN
 - Dense

At first we will train independently the CNN through a VAE and we will use the encoder for feature extrapolation.
Then we'll combine the three branches and train the model.

### VAE (Variational Auto Encoder)
As a feature extractor for our main neural network.

In [None]:
# class Sampling(keras.layers.Layer):
#     def call(self, inputs):
#         z_mean, z_log_var = inputs
#         batch = tf.shape(z_mean)[0]
#         dim = tf.shape(z_mean)[1]
#         epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
#         return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# def create_encoder(sequence_length, n_features, latent_dim):
#     inputs = Input(shape=(sequence_length, n_features))
#     x = Conv1D(filters=32, kernel_size=3, activation='relu')(inputs)
#     x = Conv1D(filters=64, kernel_size=3, activation=LeakyReLU(alpha=0.1))(x)
#     x = MaxPooling1D(pool_size=1)(x)
#     x = Conv1D(filters=128, kernel_size=2, activation=LeakyReLU(alpha=0.1))(x)
#     x = MaxPooling1D(pool_size=1)(x)
#     x = Conv1D(filters=256, kernel_size=2, activation=LeakyReLU(alpha=0.1))(x)
#     x = MaxPooling1D(pool_size=1)(x)
#     x = Conv1D(filters=256, kernel_size=2, activation=LeakyReLU(alpha=0.1))(x)
#     x = MaxPooling1D(pool_size=1)(x)
#     x = Conv1D(filters=256, kernel_size=2, activation=LeakyReLU(alpha=0.1))(x)
#     x = MaxPooling1D(pool_size=1)(x)
#     x = Flatten()(x)
#     z_mean = Dense(latent_dim, name='z_mean')(x)
#     z_log_var = Dense(latent_dim, name='z_log_var')(x)

#     encoder = Model(inputs, [z_mean, z_log_var], name='encoder')
#     return encoder

# def create_decoder(sequence_length, n_features, latent_dim):
#     latent_inputs = Input(shape=(latent_dim,))
#     x = Dense(sequence_length * n_features, activation='relu')(latent_inputs)
#     x = Reshape((sequence_length, n_features))(x)
#     # x = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(x)
#     outputs = Conv1D(filters=n_features, kernel_size=3, activation='sigmoid', padding='same')(x)
#     decoder = Model(latent_inputs, outputs, name='decoder')
#     return decoder

# class VAE(keras.Model):
#     def __init__(self, encoder, decoder, beta=1.0, **kwargs):
#         super(VAE, self).__init__(**kwargs)
#         self.encoder = encoder
#         self.decoder = decoder
#         self.sampling = Sampling()
#         self.beta = beta

#     def train_step(self, data):
#         if isinstance(data, tuple):
#             data = data[0]
#         with tf.GradientTape() as tape:
#             z_mean, z_log_var = self.encoder(data)
#             z = self.sampling((z_mean, z_log_var))
#             reconstruction = self.decoder(z)
#             reconstruction_loss = tf.reduce_mean(
#                 tf.reduce_sum(
#                     keras.losses.mean_squared_error(data, reconstruction), axis=1
#                 )
#             )
#             kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
#             kl_loss = self.beta * tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))  # Weighted KL loss
#             total_loss = reconstruction_loss + kl_loss
#         grads = tape.gradient(total_loss, self.trainable_weights)
#         self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
#         return {
#             "total_loss": total_loss,
#             "reconstruction_loss": reconstruction_loss,
#             "kl_loss": kl_loss,
#         }
    
#     def test_step(self, data):
#         if isinstance(data, tuple):
#             data = data[0]
#         z_mean, z_log_var = self.encoder(data)
#         z = self.sampling((z_mean, z_log_var))
#         reconstruction = self.decoder(z)
#         reconstruction_loss = tf.reduce_mean(
#             tf.reduce_sum(
#                 keras.losses.mean_squared_error(data, reconstruction), axis=1
#             )
#         )
#         kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
#         kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
#         total_loss = reconstruction_loss + kl_loss
#         return {
#             "loss": total_loss,
#             "reconstruction_loss": reconstruction_loss,
#             "kl_loss": kl_loss,
#         }

# def build_autoencoder(encoder, decoder, learning_rate, beta):
#     vae = VAE(encoder, decoder, beta=beta)
#     vae.compile(optimizer=Adam(learning_rate=learning_rate))
#     return vae


### Build and Train the model

In [None]:
# latent_dim = 3
# epochs = 1000
# batch_size = 1024
# patience = 20
# encoder = create_encoder(sequence_length=train_data.shape[1], n_features=train_data.shape[2], latent_dim=latent_dim)
# decoder = create_decoder(sequence_length=train_data.shape[1], n_features=train_data.shape[2], latent_dim=latent_dim)

# vae = build_autoencoder(encoder, decoder, learning_rate=0.0003, beta=0.3)

# print("\n")
# test_loss = vae.evaluate(test_data, test_data)
# print("\n")

# early_stopping = keras.callbacks.EarlyStopping(
#     monitor='kl_loss',
#     patience=patience,
#     verbose=2,
#     mode='min',
#     restore_best_weights=True,
# )

# vae.fit(train_data, train_data, epochs=epochs, batch_size=batch_size, verbose=2, callbacks=[early_stopping])

# print("\n")
# test_loss = vae.evaluate(test_data, test_data)

Loss of the test_data on the CNN before training:<br>
> loss: 9.6933 - reconstruction_loss: 1.1233 - kl_loss: 8.5700

Loss of the test_data on the CNN post training: <br>
> loss: 0.4672 - reconstruction_loss: 0.4509 - kl_loss: 0.0163


<br>
<br>
I observed lower decrease in the reconstruction loss using the difference prices in comparison with the timeseries.


### Main Model Function Definition
Three branches with independent input all converge to a dense layer.

In [14]:
def build_parallel_model(input_shape, encoder=0, l2_value=0.01):
    # LSTM Branch
    lstm_input = keras.layers.Input(shape=input_shape)
    lstm_branch = keras.layers.LSTM(90, kernel_regularizer=l2(l2_value), recurrent_regularizer=l2(l2_value), bias_regularizer=l2(l2_value))(lstm_input)

    # Dense Branch
    # dense_input = keras.layers.Input(shape=input_shape)
    # flattened = keras.layers.Flatten()(dense_input)
    # dense_branch = keras.layers.Dense(32, activation='relu', kernel_regularizer=l2(l2_value))(flattened)
    # dense_branch = keras.layers.Dense(5)(dense_branch)

    # VAE Branch
    # vae_input = keras.layers.Input(shape=input_shape)
    # z_mean, z_log_var = encoder(vae_input) # Use only z_mean for subsequent layers
    # encoded_output = keras.layers.Flatten()(z_mean)

    # Combining the branches
    combined = keras.layers.concatenate([
        lstm_branch,
        # dense_branch,
        # encoded_output,
        # cnn_output
    ])

    # Additional layers after combining
    combined_dense = keras.layers.Dense(units=1, kernel_regularizer=l2(l2_value))(combined)
    output = keras.layers.Dense(1)(combined_dense)

    model = keras.models.Model(inputs=[
        lstm_input,
        # dense_input,
        # vae_input,
        # cnn_input,
        ], outputs=output)
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def train_model(model, train_data, train_target, epochs=30, batch_size=256, patience=20):
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=patience,
        verbose=2,
        mode='min',
        restore_best_weights=True,
    )

    # live_plot = LivePlotCallback()

    model.fit(
        train_data,  # Assuming both branches use the same training data
        train_target,
        epochs=epochs,
        batch_size=batch_size,
        verbose=2,
        validation_split=0.3,
        callbacks=[early_stopping]
    )

In [15]:
def build_attention_lstm(seq_length, num_features):
    
    # Attention Model
    input_layer = Input(shape=(seq_length, num_features))
    # flatten = TimeDistributed(Flatten()) (input_layer)
    attention_output = MultiHeadAttention(num_heads=10, key_dim=64)(input_layer, input_layer, input_layer)
    dense_attention = Dense(1) (attention_output)
    
    # LSTM Model
    lstm_output = LSTM(64, return_sequences=True)(input_layer)
    dense_lstm = Dense(1)(lstm_output)
    
    # Merge the outputs
    merged = keras.layers.concatenate([dense_attention, dense_lstm])
    
    output = keras.layers.Dense(3)(merged)

    return Model(input_layer, output)

In [17]:
batch_size = 512
epochs = 3000
patience = 20
l2_value = 0.03

## GENERATE MODEL ##
model = build_attention_lstm(train_data.shape[1], train_data.shape[2])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.fit(train_data, train_target, epochs=epochs, batch_size=batch_size)
# model = load_model("../models/3branches.h5")
# train_model(model, train_data, train_target, epochs=epochs, batch_size=batch_size, patience=patience)


Epoch 1/3000


2023-12-05 21:56:58.368561: W tensorflow/core/framework/op_kernel.cc:1827] INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: Graph execution error:

Detected at node mean_squared_error/SquaredDifference defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/home/andrea/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1046, in launch_instance

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 736, in start

  File "/home/andrea/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 740, in execute_request

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/home/andrea/.local/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 546, in run_cell

  File "/home/andrea/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell

  File "/home/andrea/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell

  File "/home/andrea/.local/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/home/andrea/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async

  File "/home/andrea/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes

  File "/home/andrea/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code

  File "/tmp/ipykernel_30537/3558612636.py", line 12, in <module>

  File "/tmp/ipykernel_30537/3601051681.py", line 50, in train_model

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1783, in fit

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1377, in train_function

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1360, in step_function

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1349, in run_step

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1127, in train_step

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/training.py", line 1185, in compute_loss

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/losses.py", line 143, in __call__

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/losses.py", line 270, in call

  File "/home/andrea/.local/lib/python3.10/site-packages/keras/src/losses.py", line 1706, in mean_squared_error

required broadcastable shapes
	 [[{{node mean_squared_error/SquaredDifference}}]] [Op:__inference_train_function_20319]

### Evaluate the model

In [None]:
data_to_predict = (train_data)
actual_prediction = train_target

model.evaluate(data_to_predict, actual_prediction)

# TESTing: DENORMALIZ300E TARGET AND PREDICTIONS ##
price_predicted_array = scaler_target.inverse_transform(model.predict(data_to_predict)) #[1:]
price_actual_array = scaler_target.inverse_transform(actual_prediction).flatten() #[:-1]

## Evaluation
evaluate_price_predictions(price_predicted_array.flatten(), price_actual_array)

## PLOTting #
plot_results(price_actual_array, price_predicted_array, target_column_name)

### Model training:
1st with 2010-01-01 - 2023-01-01 data
-> then save

2nd use 2023-01-01  - 2023-11-18 data
-> override

In [None]:
model.save("../models/3branches/low.h5")