In [1]:
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.metrics import R2Score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking, TimeDistributed, Lambda
from tensorflow.keras.optimizers.legacy import Adam

## Data Loading

In [2]:
with open("./data_manipulation/lstm_data.pickle", "rb") as fp:
    data = pickle.load(fp)

X = data["X"]
Y = data["Y"]
del data

## Model Building

In [3]:
metric = R2Score()

In [4]:
# Padding function
def pad_sequences_dynamic(targets, padding_value=np.nan):
    return pad_sequences(targets, dtype='float32', padding='post', value=padding_value)

def r2_score(y_true, y_pred):
    mask = tf.math.is_finite(y_true)
    y_true = tf.where(mask, y_true, y_pred)  # Replace NaNs with predictions in y_true
    y_true_mean = K.mean(y_true, axis=-1, keepdims=True)
    ss_total = K.sum(K.square(y_true - y_true_mean), axis=-1)
    ss_res = K.sum(K.square(y_true - y_pred), axis=-1)
    r2 = 1 - ss_res / (ss_total + K.epsilon())
    return r2
    
# Custom loss function to handle NaN values
def custom_mse_loss(y_true, y_pred):
    mask = tf.math.is_finite(y_true)
    y_true = tf.where(mask, y_true, y_pred)  # Replace NaNs with predictions in y_true
    return tf.keras.losses.mean_squared_error(y_true, y_pred)

def build_lstm_model(input_shape, output_length):
    model = Sequential()
    model.add(Masking(mask_value=np.nan, input_shape=input_shape))
    model.add(TimeDistributed(Dense(16)))
    model.add(LSTM(8, return_sequences=True))
    #model.add(Dropout(0.2))
    model.add(LSTM(8, return_sequences=True))
    #model.add(Dropout(0.2))
    model.add(TimeDistributed(Dense(1)))  # Use TimeDistributed to predict for each time step
    model.add(Lambda(lambda x: x[:, -output_length:, :]))  # Truncate to the output length
    model.compile(optimizer=Adam(), loss=custom_mse_loss, metrics=[r2_score])
    return model

In [5]:
# Pad the targets for batch processing
Y_padded = pad_sequences_dynamic(Y, padding_value=np.nan)
Y_padded = np.expand_dims(Y_padded, -1)

In [6]:
input_shape = (X.shape[1], X.shape[2])  # (time steps, features)
output_length = Y_padded.shape[1]

print(input_shape,output_length)

(8, 23) 8


In [7]:
model = build_lstm_model(input_shape, output_length)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking (Masking)           (None, 8, 23)             0         
                                                                 
 time_distributed (TimeDist  (None, 8, 16)             384       
 ributed)                                                        
                                                                 
 lstm (LSTM)                 (None, 8, 8)              800       
                                                                 
 lstm_1 (LSTM)               (None, 8, 8)              544       
                                                                 
 time_distributed_1 (TimeDi  (None, 8, 1)              9         
 stributed)                                                      
                                                                 
 lambda (Lambda)             (None, 8, 1)              0

In [8]:
X.shape, Y_padded.shape

((4992, 8, 23), (4992, 8, 1))

In [None]:
# Mask the padding values during training
model.fit(X, Y_padded, epochs=2000, batch_size=None, validation_split=0.2)

Epoch 1/2000
