In [1]:
import os
import glob
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.optimizers import Adam

tf.random.set_seed(1)
np.random.seed(1)

import warnings
warnings.filterwarnings("ignore")

class DataGenerator(keras.utils.Sequence):
    def __init__(self, file_path, batch_size=32, shuffle=True, t_v="train"):
        from s3fs.core import S3FileSystem
        self.s3 = S3FileSystem()
        self.file_path = file_path
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.t_v = t_v
        self.file_types = [f"{t_v}_frames_x", f"{t_v}_frames_y", f"{t_v}_frames_embed", f"{t_v}_frames_decoder_input"]
        self.indices = []
        for i in range(1, 9):
            for j in range(1, 5):
                if i==1 and j==4:
                    continue
                if i==8 and j==1:
                    continue
                self.indices += [f"_{i}_{j}"]
        self.on_epoch_end()
        self.total_samples = 0
        self.num_samples_per_index = {}
        for index in self.indices:
            data = np.load(self.s3.open(os.path.join(self.file_path, self.file_types[3] + index + ".pkl")), allow_pickle=True)
            self.total_samples += data.shape[0]
            self.num_samples_per_index[index] = data.shape[0]
        self.counter = 0
        self.time_dict = {}
        for idx in self.indices:
            self.time_dict[idx] = 0
        
        self.data_x = {}
        self.data_y = {}
        self.data_embed = {}
        self.data_decoder = {}
        
        for i in range(4):
            idx = self.indices[i]
            self.data_x[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_x"+idx+".pkl")), allow_pickle=True)
            self.data_y[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_y"+idx+".pkl")), allow_pickle=True)
            self.data_embed[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_embed"+idx+".pkl")), allow_pickle=True)
            self.data_decoder[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_decoder_input"+idx+".pkl")), allow_pickle=True)
            # add to queue
            
    def __len__(self):
        return int(np.floor(self.total_samples/self.batch_size))
        
    def __getitem__(self, index):

        # Generate indexes of the batch
        lower_idx = index * self.batch_size
        upper_idx = (index+1) * self.batch_size - 1
        
        t_v = self.t_v
        
        start = 0
        end = 0
        begin = 0
        finish = 0
        starting_idx = ""
        ending_idx = ""
        
        for idx in self.indices:
            end = start + self.num_samples_per_index[idx]
            if lower_idx < end and lower_idx >= start:
                starting_idx = idx
                begin = lower_idx-start
            if upper_idx < end and upper_idx >= start:
                ending_idx = idx
                finish = upper_idx-start
                break
            start = end
        
        print("yo: " + starting_idx)
        print("yo2: " + ending_idx)
        train_data_x, train_data_embed, train_data_decoder_input, train_data_y = [], [], [], []
        started = False
        for idx in self.indices:
            if not started and idx != starting_idx:
                continue
            if idx not in self.data_y.keys():
                swap_idx = sorted([(self.time_dict[idx], idx) for idx in self.data_y.keys()])[0][1]
                del self.data_x[swap_idx]
                del self.data_y[swap_idx]
                del self.data_embed[swap_idx]
                del self.data_decoder[swap_idx]
                self.data_x[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_x"+idx+".pkl")), allow_pickle=True)
                self.data_y[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_y"+idx+".pkl")), allow_pickle=True)
                self.data_embed[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_embed"+idx+".pkl")), allow_pickle=True)
                self.data_decoder[idx] = np.load(self.s3.open(os.path.join(self.file_path, f"{t_v}_frames_decoder_input"+idx+".pkl")), allow_pickle=True)
            self.counter += 1
            self.time_dict[idx] = self.counter
            if idx == starting_idx:
                if idx == ending_idx:
                    train_data_x, train_data_y, train_data_embed, train_data_decoder_input = \
                    self.fetch_data(idx, train_data_x, train_data_y, train_data_embed, train_data_decoder_input, begin, finish)
                    break
                else:
                    train_data_x, train_data_y, train_data_embed, train_data_decoder_input = \
                    self.fetch_data(idx, train_data_x, train_data_y, train_data_embed, train_data_decoder_input, begin, -1)
            elif idx != ending_idx:
                train_data_x, train_data_y, train_data_embed, train_data_decoder_input = \
                self.fetch_data(idx, train_data_x, train_data_y, train_data_embed, train_data_decoder_input, -1, -1)
            elif idx == ending_idx:
                train_data_x, train_data_y, train_data_embed, train_data_decoder_input = \
                self.fetch_data(idx, train_data_x, train_data_y, train_data_embed, train_data_decoder_input, -1, finish)
                break
                
        train_data_x, train_data_y, train_data_embed, train_data_decoder_input = np.concatenate(train_data_x), np.concatenate(train_data_y), np.concatenate(train_data_embed), np.concatenate(train_data_decoder_input)
        
        return [np.nan_to_num(train_data_x[:, :, 1:]).astype(np.float32), np.nan_to_num(train_data_embed).astype(np.float32), np.nan_to_num(train_data_decoder_input).astype(np.float32)], np.nan_to_num(train_data_y[:, :, 1:]).astype(np.float32)

    def on_epoch_end(self):
        np.random.shuffle(self.indices)
        
    def fetch_data(self, idx, train_data_x, train_data_y, train_data_embed, train_data_decoder_input, start=-1, end=-1):
        
        if start==-1 and end==-1:
            train_data_x += [self.data_x[idx]]
            train_data_y += [self.data_y[idx]]
            train_data_embed += [self.data_embed[idx]]
            train_data_decoder_input += [self.data_decoder[idx]]
        elif start==-1:
            train_data_x += [self.data_x[idx][:end+1]]
            train_data_y += [self.data_y[idx][:end+1]]
            train_data_embed += [self.data_embed[idx][:end+1]]
            train_data_decoder_input += [self.data_decoder[idx][:end+1]]
        elif end==-1:
            train_data_x += [self.data_x[idx][start:]]
            train_data_y += [self.data_y[idx][start:]]
            train_data_embed += [self.data_embed[idx][start:]]
            train_data_decoder_input += [self.data_decoder[idx][start:]]
        else:
            train_data_x += [self.data_x[idx][start:end+1]]
            train_data_y += [self.data_y[idx][start:end+1]]
            train_data_embed += [self.data_embed[idx][start:end+1]]
            train_data_decoder_input += [self.data_decoder[idx][start:end+1]]
            
        return train_data_x, train_data_y, train_data_embed, train_data_decoder_input

def define_model(num_embedding_features, num_historical_features, historical_data_window, future_prediction_window):
    
    encoder_input = keras.Input(shape=(historical_data_window, num_historical_features))
    encoder_lstm1 = layers.LSTM(32, return_sequences=True)(encoder_input)
    encoder_lstm1 = layers.ReLU()(encoder_lstm1)
    batch_norm1 = layers.BatchNormalization()(encoder_lstm1)
    encoder_lstm2 = layers.LSTM(64, return_sequences=True)(batch_norm1)
    encoder_lstm2 = layers.ReLU()(encoder_lstm2)
    batch_norm2 = layers.BatchNormalization()(encoder_lstm2)
    encoder_output = layers.LSTM(32)(batch_norm2)
    encoder_output = layers.ReLU()(encoder_output)

    embedding_input = keras.Input(shape=(num_embedding_features, ))
    embedding_layer1 = layers.Dense(32)(embedding_input)
    embedding_layer1 = layers.ReLU()(embedding_layer1)
    batch_norm3 = layers.BatchNormalization()(embedding_layer1)
    embedding_layer2 = layers.Dense(64)(batch_norm3)
    embedding_layer2 = layers.ReLU()(embedding_layer2)
    batch_norm4 = layers.BatchNormalization()(embedding_layer2)
    embedding_output = layers.Dense(32)(batch_norm4)
    embedding_output = layers.ReLU()(embedding_output)

    embedding_encoder_concatenate = layers.Concatenate()([encoder_output, embedding_output])
    embedding_encoder_concatenate = layers.Dense(32)(embedding_encoder_concatenate)
    embedding_encoder_concatenate = layers.ReLU()(embedding_encoder_concatenate)
    future_cpc = keras.Input(shape=(future_prediction_window, ))

    decoder_input = layers.RepeatVector(future_prediction_window)(embedding_encoder_concatenate)
    future_cpc = layers.Reshape((-1, 1))(future_cpc)
    decoder_input = layers.Concatenate()([decoder_input, future_cpc])
    decoder_lstm1 = layers.LSTM(32, return_sequences=True)(decoder_input)
    decoder_lstm1 = layers.ReLU()(decoder_lstm1)
    batch_norm5 = layers.BatchNormalization()(decoder_lstm1)
    decoder_lstm2 = layers.LSTM(16, return_sequences=True)(batch_norm5)
    decoder_lstm2 = layers.ReLU()(decoder_lstm2)
    batch_norm6 = layers.BatchNormalization()(decoder_lstm2)
    decoder_output = layers.LSTM(2, return_sequences=True)(batch_norm6)
    decoder_output_fin = layers.ReLU()(decoder_output)

    # Create the model
    ED_lstm_model = tf.keras.Model(inputs=[encoder_input, embedding_input, future_cpc], outputs=decoder_output_fin)
    ED_lstm_model.compile(optimizer="adam", loss='mean_squared_error')
    
    return ED_lstm_model


def train_model(epochs, batchsize, model_path, model, train_generator, valid_generator):
    
    history = model.fit_generator(
                         generator = train_generator,
                         validation_data = valid_generator,
                         use_multiprocessing = True,
                         workers=2,
                         verbose=1,
                         shuffle=False,
                         callbacks=[
                            tf.keras.callbacks.EarlyStopping(
                                monitor='val_loss',
                                min_delta=0,
                                patience=10,
                                verbose=1,
                                mode='min'
                            ),
                            tf.keras.callbacks.ModelCheckpoint(
                                model_path,
                                monitor='val_loss',
                                save_best_only=True,
                                mode='min',
                                verbose=1
                            ),
                            tf.keras.callbacks.TerminateOnNaN()
                         ],
                     )
    return history

In [2]:
model_path = "./trained_model.ckpt"
epochs = 1
batch_size = 32

tf.keras.backend.set_image_data_format("channels_last")

train_data_gen = DataGenerator("s3://training-data-lstm/processed_data_1/", 32, True, "train")
val_data_gen = DataGenerator("s3://training-data-lstm/processed_data_1/", 32, True, "test")

num_embedding_features = 11
num_historical_features = 14
historical_data_window = 14
future_prediction_window = 3

model = define_model(num_embedding_features, num_historical_features, historical_data_window, future_prediction_window)
history = train_model(epochs, batch_size, model_path, model, train_data_gen, val_data_gen)

  3608/327176 [..............................] - ETA: 1:07:04 - loss: 59.0394Batch 3611: Invalid loss, terminating training
  3612/327176 [..............................] - ETA: 1:07:04 - loss: nan    

Process Keras_worker_ForkPoolWorker-2:
Process Keras_worker_ForkPoolWorker-1:
Process Keras_worker_ForkPoolWorker-4:
Process Keras_worker_ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    s

KeyboardInterrupt: 

In [5]:
train_data_gen = DataGenerator("s3://training-data-lstm/processed_data_1/", 32, True, "train")
X, y = train_data_gen.__getitem__(1901)

yo: _8_1
yo2: _8_1


In [9]:
np.sum(np.isnan(y))

0

In [18]:
X, y = train_data_gen.__getitem__(11004)

yo: _1_4
yo2: _1_4


In [3]:
print(train_data_gen.indices)

['_5_3', '_6_3', '_3_4', '_6_1', '_4_4', '_6_2', '_7_4', '_2_1', '_7_2', '_6_4', '_7_1', '_2_2', '_1_3', '_7_3', '_2_4', '_5_4', '_4_3', '_3_1', '_8_2', '_1_2', '_5_2', '_1_1', '_5_1', '_8_4', '_8_3', '_3_3', '_3_2', '_4_2', '_4_1', '_2_3']


In [9]:
for i in range(150000):
    X, y = train_data_gen.__getitem__(i)
    ##print(f"Safe {i}", end='\r', flush=True)
    if np.sum(np.isnan(X[0])) or np.sum(np.isnan(X[1])) or np.sum(np.isnan(X[2])) or np.sum(np.isnan(y)):
        print(f"yoho {i}")

KeyboardInterrupt: 

In [None]:
print(x, )