In [1]:
import pandas as pd
import tensorflow as tf
import keras
from kerastuner import RandomSearch
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, TensorBoard 

  from kerastuner import RandomSearch


### Preparing train and val dataset for LSTM

In [2]:
# Paths to the train and validation datasets
train_data_path = 'train_data.csv'  
val_data_path = 'val_data.csv'  
test_data_path = 'test_data.csv'

In [3]:
# Load the datasets
train_data = pd.read_csv(train_data_path)
val_data = pd.read_csv(val_data_path)
test_data = pd.read_csv(test_data_path)
print(train_data.shape)
print(val_data.shape)

(2791, 1981)
(931, 1981)


In [4]:
# Splitting the datasets into features (X) and target (y)
X_train = train_data.drop('Output', axis=1)
y_train = train_data['Output']
X_val = val_data.drop('Output', axis=1)
y_val = val_data['Output']
X_test = test_data.drop('Output', axis=1)
y_test = test_data['Output']

In [5]:
# Check the shapes of the datasets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (2791, 1980)
y_train shape: (2791,)
X_val shape: (931, 1980)
y_val shape: (931,)
X_test shape: (931, 1980)
y_test shape: (931,)


In [6]:
print (y_val)

0      0.080714
1      0.687701
2      0.019128
3      0.191419
4      0.391193
         ...   
926    0.470977
927    0.183726
928    0.029032
929    0.089227
930    0.197472
Name: Output, Length: 931, dtype: float64


### Reshaping the data

In [7]:
import numpy as np

In [8]:
# Assuming an equal number of features per day
num_days = 60
total_features = 1980
features_per_day = int(total_features / num_days)
features_per_day

33

In [9]:
# Reshape the data to (samples, time steps, features)
X_train_reshaped = X_train.values.reshape(-1, num_days, features_per_day)
X_val_reshaped = X_val.values.reshape(-1, num_days, features_per_day)
X_test_reshaped = X_test.values.reshape(-1, num_days, features_per_day)

In [10]:
print(X_train_reshaped)

[[[8.77508802e-01 8.75963586e-01 8.74107413e-01 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [8.83411306e-01 8.85684264e-01 8.85258435e-01 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [8.47678380e-01 8.68316151e-01 8.60032794e-01 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  ...
  [6.42186773e-01 6.40862657e-01 6.39483947e-01 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [6.67746895e-01 6.62961532e-01 6.49134702e-01 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [6.87238444e-01 7.00086699e-01 6.76880022e-01 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]]

 [[3.84374500e-02 3.66728240e-02 3.22461207e-02 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [3.94101290e-02 3.78413780e-02 3.96176240e-02 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  [3.85370899e-02 3.68141810e-02 3.72662445e-02 ... 0.00000000e+00
   0.00000000e+00 0.00000000e+00]
  ...
  [3.46843300e-03 1.99313900e-03 7.83793000e-04 ... 0.00000000e+00
   1.00000

In [11]:
# Check the shapes of the datasets
print("X_train_reshaped shape:", X_train_reshaped.shape)
print("y_train shape:", y_train.shape)
print("X_val_reshaped shape:", X_val_reshaped.shape)
print("y_val:", y_val.shape)
print("X_test_reshaped shape:", X_test_reshaped.shape)
print("y_test shape:", y_test.shape)

X_train_reshaped shape: (2791, 60, 33)
y_train shape: (2791,)
X_val_reshaped shape: (931, 60, 33)
y_val: (931,)
X_test_reshaped shape: (931, 60, 33)
y_test shape: (931,)


In [12]:
# Define your batch size
batch_size = 32  # You can adjust this according to your needs

# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_reshaped, y_train)).shuffle(len(X_train_reshaped)).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val_reshaped, y_val)).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_reshaped, y_test)).batch(batch_size)

### Building transformer model

In [13]:
def positional_encoding(position, d_model):
    def get_angles(pos, i, d_model):
        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return pos * angles

    position = tf.range(position, dtype=tf.float32)[:, tf.newaxis]
    i = tf.range(d_model, dtype=tf.float32)[tf.newaxis, :]

    angle_rads = get_angles(position, i, d_model)

    # Apply sin to even indices in the array; 2i
    sines = tf.math.sin(angle_rads[:, 0::2])

    # Apply cos to odd indices in the array; 2i+1
    cosines = tf.math.cos(angle_rads[:, 1::2])

    pos_encoding = tf.concat([sines, cosines], axis=-1)
    pos_encoding = pos_encoding[tf.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

In [14]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)

        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        # Split the last dimension into (num_heads, depth)
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        # Transpose for shape (batch_size, num_heads, seq_len, depth)
        return tf.transpose(x, perm=[0, 2, 1, 3])
    
    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]

        # Linear layers
        q = self.wq(q)  # (batch_size, seq_len, d_model)
        k = self.wk(k)
        v = self.wv(v)

        # Debugging: Print shapes after linear transformation
        #print("Shapes after linear transformation:", q.shape, k.shape, v.shape)

        # Split heads
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        # Debugging: Print shapes after split_heads
        #print("Shapes after split_heads:", q.shape, k.shape, v.shape)

        # Scaled dot-product attention
        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
        # (batch_size, num_heads, seq_len_q, depth), (batch_size, num_heads, seq_len_q, seq_len_k)

        # Concatenate heads
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))

        # Final linear layer
        output = self.dense(concat_attention)

        return output, attention_weights
    
    def scaled_dot_product_attention(q, k, v, mask):
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
        if mask is not None:
            scaled_attention_logits += (mask * -1e9)  
        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)
        return output, attention_weights

In [15]:
def pointwise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model)
    ])

In [16]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = pointwise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):
        attn_output, _ = self.mha(x, x, x, mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)

        return out2


In [17]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()

        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)

        self.ffn = pointwise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        self.dropout3 = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
        attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask)
        attn1 = self.dropout1(attn1, training=training)
        out1 = self.layernorm1(attn1 + x)

        attn2, attn_weights_block2 = self.mha2(enc_output, enc_output, out1, padding_mask)
        attn2 = self.dropout2(attn2, training=training)
        out2 = self.layernorm2(attn2 + out1)

        ffn_output = self.ffn(out2)
        ffn_output = self.dropout3(ffn_output, training=training)
        out3 = self.layernorm3(ffn_output + out2)

        return out3, attn_weights_block1, attn_weights_block2


In [18]:
class TimeSeriesTransformer(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, rate=0.1):
        super(TimeSeriesTransformer, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        # Input processing
        self.input_layer = tf.keras.layers.Dense(d_model, activation='relu')
        self.pos_encoding = positional_encoding(60, d_model)  # 60 time steps

        # Encoder Layers
        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]

        # Decoder Layers
        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]

        # Final Output Layer
        self.final_layer = tf.keras.layers.Dense(1)  # Predicting a single value

    def call(self, x, training=False):
        seq_len = tf.shape(x)[1]

        # Example default masks (adjust as needed)
        look_ahead_mask = None  # or create a suitable look ahead mask
        padding_mask = None  # or create a suitable padding mask
        
        # Input processing
        x = self.input_layer(x)  # (batch_size, seq_len, d_model)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]

        # Encoder
        for i in range(self.num_layers):
            x = self.enc_layers[i](x, training, padding_mask)

        encoder_output = x  # (batch_size, input_seq_len, d_model)

        # Decoder
        decoder_output = encoder_output  # Adjust as needed
        for i in range(self.num_layers):
            decoder_output = self.dec_layers[i](decoder_output, encoder_output, training, look_ahead_mask, padding_mask)

        # Final Output
        final_output = self.final_layer(decoder_output)  # (batch_size, seq_len, 1)

        return final_output

### Hyperparameter Tuner

In [19]:
import kerastuner as kt

def build_hypermodel(hp):
    num_layers = hp.Int('num_layers', min_value=2, max_value=6, step=1)
    d_model = hp.Int('d_model', min_value=64, max_value=512, step=64)
    num_heads = hp.Int('num_heads', min_value=2, max_value=8, step=2)
    dff = hp.Int('dff', min_value=128, max_value=2048, step=128)
    rate = hp.Float('rate', min_value=0.1, max_value=0.5, step=0.1)

    model = TimeSeriesTransformer(num_layers=num_layers, d_model=d_model, 
                                  num_heads=num_heads, dff=dff, rate=rate)

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

    return model

# Initialize the tuner
tuner = kt.Hyperband(build_hypermodel,
                     objective='val_loss',
                     max_epochs=10,
                     factor=3,
                     directory='transformer_tuning',
                     project_name='stock_prediction')

# Define callbacks (e.g., EarlyStopping and TensorBoard)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
tensorboard = TensorBoard(log_dir='./logs')

# Start the search for the best hyperparameter configuration
tuner.search(train_dataset, validation_data=val_dataset, epochs=50, callbacks=[early_stopping, tensorboard])

Reloading Tuner from transformer_tuning\stock_prediction\tuner0.json

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
5                 |4                 |num_layers
384               |128               |d_model
8                 |2                 |num_heads
2048              |1152              |dff
0.2               |0.5               |rate
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2


Traceback (most recent call last):
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 273, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 238, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\tuners\hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **co

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 273, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\base_tuner.py", line 238, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\tuners\hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras_tuner\src\engine\hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_filev788uc8h.py", line 18, in tf__train_function
    raise
  File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_filec5ox0ws4.py", line 31, in tf__call
    ag__.for_stmt(ag__.converted_call(ag__.ld(range), (ag__.ld(self).num_layers,), None, fscope), None, loop_body, get_state, set_state, ('x',), {'iterate_names': 'i'})
  File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_filec5ox0ws4.py", line 29, in loop_body
    x = ag__.converted_call(ag__.ld(self).enc_layers[ag__.ld(i)], (ag__.ld(x), ag__.ld(training), ag__.ld(padding_mask)), None, fscope)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_fileiznkj8k3.py", line 10, in tf__call
    attn_output, _ = ag__.converted_call(ag__.ld(self).mha, (ag__.ld(x), ag__.ld(x), ag__.ld(x), ag__.ld(mask)), None, fscope)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_file57nxcwqz.py", line 17, in tf__call
    scaled_attention, attention_weights = ag__.converted_call(ag__.ld(scaled_dot_product_attention), (ag__.ld(q), ag__.ld(k), ag__.ld(v), ag__.ld(mask)), None, fscope)
                                                                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
NameError: in user code:

    File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_filec5ox0ws4.py", line 31, in tf__call
        ag__.for_stmt(ag__.converted_call(ag__.ld(range), (ag__.ld(self).num_layers,), None, fscope), None, loop_body, get_state, set_state, ('x',), {'iterate_names': 'i'})
    File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_filec5ox0ws4.py", line 29, in loop_body
        x = ag__.converted_call(ag__.ld(self).enc_layers[ag__.ld(i)], (ag__.ld(x), ag__.ld(training), ag__.ld(padding_mask)), None, fscope)
    File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_fileiznkj8k3.py", line 10, in tf__call
        attn_output, _ = ag__.converted_call(ag__.ld(self).mha, (ag__.ld(x), ag__.ld(x), ag__.ld(x), ag__.ld(mask)), None, fscope)
    File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_file57nxcwqz.py", line 17, in tf__call
        scaled_attention, attention_weights = ag__.converted_call(ag__.ld(scaled_dot_product_attention), (ag__.ld(q), ag__.ld(k), ag__.ld(v), ag__.ld(mask)), None, fscope)

    NameError: Exception encountered when calling layer 'time_series_transformer' (type TimeSeriesTransformer).
    
    in user code:
    
        File "C:\Users\lenovo\AppData\Local\Temp\ipykernel_17992\2425982286.py", line 35, in call  *
            x = self.enc_layers[i](x, training, padding_mask)
        File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_fileiznkj8k3.py", line 10, in tf__call
            attn_output, _ = ag__.converted_call(ag__.ld(self).mha, (ag__.ld(x), ag__.ld(x), ag__.ld(x), ag__.ld(mask)), None, fscope)
        File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_file57nxcwqz.py", line 17, in tf__call
            scaled_attention, attention_weights = ag__.converted_call(ag__.ld(scaled_dot_product_attention), (ag__.ld(q), ag__.ld(k), ag__.ld(v), ag__.ld(mask)), None, fscope)
    
        NameError: Exception encountered when calling layer 'encoder_layer' (type EncoderLayer).
        
        in user code:
        
            File "C:\Users\lenovo\AppData\Local\Temp\ipykernel_17992\3993560056.py", line 15, in call  *
                attn_output, _ = self.mha(x, x, x, mask)
            File "C:\Users\lenovo\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
            File "C:\Users\lenovo\AppData\Local\Temp\__autograph_generated_file57nxcwqz.py", line 17, in tf__call
                scaled_attention, attention_weights = ag__.converted_call(ag__.ld(scaled_dot_product_attention), (ag__.ld(q), ag__.ld(k), ag__.ld(v), ag__.ld(mask)), None, fscope)
        
            NameError: Exception encountered when calling layer 'multi_head_attention' (type MultiHeadAttention).
            
            in user code:
            
                File "C:\Users\lenovo\AppData\Local\Temp\ipykernel_17992\2737509367.py", line 43, in call  *
                    scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
            
                NameError: name 'scaled_dot_product_attention' is not defined
            
            
            Call arguments received by layer 'multi_head_attention' (type MultiHeadAttention):
              • v=tf.Tensor(shape=(None, 60, 384), dtype=float32)
              • k=tf.Tensor(shape=(None, 60, 384), dtype=float32)
              • q=tf.Tensor(shape=(None, 60, 384), dtype=float32)
              • mask=None
        
        
        Call arguments received by layer 'encoder_layer' (type EncoderLayer):
          • x=tf.Tensor(shape=(None, 60, 384), dtype=float32)
          • training=True
          • mask=None
    
    
    Call arguments received by layer 'time_series_transformer' (type TimeSeriesTransformer):
      • x=tf.Tensor(shape=(None, 60, 33), dtype=float32)
      • training=True



In [14]:
# Combine the train and validation sets for final training
X_combined = np.concatenate((X_train_reshaped, X_val_reshaped), axis=0)
y_combined = np.concatenate((y_train, y_val), axis=0)

In [15]:
# Train the best model on the combined dataset
best_model.fit(X_combined, y_combined, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x17600094cd0>

In [19]:
# After training, save the best model to an HDF5 file
best_model_path = 'LSTM_Model/best_lstm_model.h5'  # Replace with your desired path
best_model.save(best_model_path)

print(f"The best model is saved to {best_model_path}")

  saving_api.save_model(


The best model is saved to LSTM_Model/best_lstm_model.h5


### Best model loss and prediction for test set

In [16]:
# Evaluate the model on the test set
test_loss = best_model.evaluate(X_test_reshaped, y_test, verbose=0)

In [17]:
# Calculate predictions to evaluate other metrics such as R^2 or MAE
y_pred = best_model.predict(X_test_reshaped)



### Getting model and result info

In [18]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

# Assuming y_test and y_pred are already defined
# You can replace them with your actual test and prediction data

# Calculate R^2 score
r2 = r2_score(y_test, y_pred)

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

# Calculate MSE
mse = mean_squared_error(y_test, y_pred)

# Calculate RMSE
rmse = np.sqrt(mse)

# Calculate MAPE
# Assuming y_pred is the array you provided
y_pred = y_pred.flatten()

# Assuming test_loss is defined
print(f"Test Loss: {test_loss}")
print(f"R^2 Score: {r2}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")

# Now y_pred is one-dimensional, and you can proceed with calculations like MAPE
# Ensure there are no zero elements in y_test to avoid division by zero in MAPE calculation
if np.any(y_test == 0):
    print("Error: y_test contains zero values, which will lead to division by zero in MAPE calculation.")
else:
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    print(f"Mean Absolute Percentage Error: {mape}%")

Test Loss: 0.000555491482373327
R^2 Score: 0.9923823291490315
Mean Absolute Error: 0.011540712830362215
Mean Squared Error: 0.000555491405254933


In [20]:
from keras.models import load_model

# Load the best model
best_model = load_model(best_model_path)

# Get the configuration of the model
config = best_model.get_config()

# Extract the units from the LSTM layers
lstm1_units = config['layers'][1]['config']['units']
lstm2_units = config['layers'][3]['config']['units']

# Extract the dropout rate from the Dropout layer
dropout_rate = config['layers'][2]['config']['rate']

# Learning rate is part of the optimizer's configuration
learning_rate = best_model.optimizer.learning_rate.numpy()

# Output the extracted values
print(f"LSTM Layer 1 Units: {lstm1_units}")
print(f"LSTM Layer 2 Units: {lstm2_units}")
print(f"Dropout Rate: {dropout_rate}")
print(f"Learning Rate: {learning_rate}")

LSTM Layer 1 Units: 256
LSTM Layer 2 Units: 256
Dropout Rate: 0.0
Learning Rate: 0.0010000000474974513
