In [3]:
import os
import pickle
import gc

import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.regularizers import L1L2

In [4]:
config = {
    "EMBEDDING_SIZE": 32,
    
    "ATTENTION_SIZE": 8,
    "ATTENTION_MODULE": "additive",
    "ATTENTION_METHOD": "standard",
    
    "LR": 0.0005,
    "BATCH_SIZE": 32,
    "EPOCHS": 30,
}

MODEL = f"{config['ATTENTION_METHOD']}-{config['ATTENTION_MODULE']}-attention"

In [5]:
RUN_WANDB = False

if(RUN_WANDB):
    import wandb
    from wandb.keras import WandbCallback
    from secrets import WANDB
    wandb.login(key=WANDB)

## **Load Data Function**

In [6]:
def get_data(fold):
    with open(f'./data/fold_{fold}.npy', mode='rb') as f:
        train_inputs = np.load(f, allow_pickle=False)
        train_target_inputs = np.load(f, allow_pickle=False) 
        train_targets = np.load(f, allow_pickle=False)
        # val arrays
        val_inputs = np.load(f, allow_pickle=False)
        val_target_inputs = np.load(f, allow_pickle=False)
        val_targets = np.load(f, allow_pickle=False)
        # test arrays
        test_inputs = np.load(f, allow_pickle=False)
        test_target_inputs = np.load(f, allow_pickle=False)
        test_targets = np.load(f, allow_pickle=False)
        # data info
        data_features = np.load(f, allow_pickle=False)
    return (train_inputs,train_target_inputs), train_targets, (val_inputs,val_target_inputs), val_targets, (test_inputs,test_target_inputs), test_targets, data_features

X_train, y_train, X_val, y_val, X_test, y_test, data_features = get_data(2)

## **Model Functions**

In [8]:
class BahdanauAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttentionLayer, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
    def get_config(self):
        config = super(BahdanauAttentionLayer, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        expanded_query = tf.expand_dims(query, 2)
        encoded_query = self.W1(expanded_query)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            encoded_keys = self.W2(tf.expand_dims(keys, 1))
        combined_encoded_query_and_keys = encoded_query + encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = tf.reduce_sum(tanh_score, axis=-1)
        attention_weights = tf.nn.softmax(score, axis=-1)
        overall_context_vector = tf.matmul(attention_weights, values)
        context_vector = overall_context_vector
        return context_vector

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttention, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
        self.V = tf.keras.layers.Dense(1)
    def get_config(self):
        config = super(BahdanauAttention, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        query_with_time_axis = tf.expand_dims(query, 2)
        encoded_query = self.W1(query_with_time_axis)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            encoded_keys = self.W2(tf.expand_dims(keys, 1))
        combined_encoded_query_and_keys = encoded_query + encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = self.V(tanh_score)
        squeezed_score = tf.squeeze(score, axis=-1)
        attention_weights = tf.nn.softmax(squeezed_score, axis=-1)
        context_vector = tf.matmul(attention_weights, values)
        return context_vector
    
class LuongAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttentionLayer, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
    def get_config(self):
        config = super(BahdanauAttentionLayer, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        expanded_query = tf.expand_dims(query, 2)
        encoded_query = self.W1(expanded_query)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            keys = tf.expand_dims(keys, 1)
            encoded_keys = self.W2(keys)
        combined_encoded_query_and_keys = encoded_query * encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = tf.reduce_sum(tanh_score, axis=-1)
        attention_weights = tf.nn.softmax(score, axis=-1)
        overall_context_vector = tf.matmul(attention_weights, values)
        context_vector = overall_context_vector
        return context_vector

class LuongAttention(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttention, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = tf.keras.layers.Dense(self.units, use_bias=self.use_bias)
        self.V = tf.keras.layers.Dense(1)
    def get_config(self):
        config = super(BahdanauAttention, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        query_with_time_axis = tf.expand_dims(query, 2)
        encoded_query = self.W1(query_with_time_axis)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            keys = tf.expand_dims(keys, 1)
            encoded_keys = self.W2(keys)
        combined_encoded_query_and_keys = encoded_query * encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = self.V(tanh_score)
        squeezed_score = tf.squeeze(score, axis=-1)
        attention_weights = tf.nn.softmax(squeezed_score, axis=-1)
        context_vector = tf.matmul(attention_weights, values)
        return context_vector

def simple_encoder_decoder(historical_shape, targets_shape):
    historicals = tf.keras.layers.Input(shape=historical_shape)
    futures = tf.keras.layers.Input(shape=targets_shape)
    
    hidden = tf.keras.layers.LSTM(config["EMBEDDING_SIZE"], 
                                  return_sequences=False, 
                                  return_state=False, 
                                  recurrent_initializer='glorot_uniform', 
                                  activity_regularizer=L1L2(l1=0.000001, l2=0.000001))(historicals)
    
    cell = tf.keras.layers.LSTM(config["EMBEDDING_SIZE"], 
                                  return_sequences=True, 
                                  return_state=True, 
                                  recurrent_initializer='glorot_uniform', 
                                  activity_regularizer=L1L2(l1=0.000001, l2=0.000001))(historicals)
    
    print(cell)
    
    decoder = tf.keras.layers.LSTM(config["EMBEDDING_SIZE"],
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform', 
                                   activity_regularizer=L1L2(l1=0.000001, l2=0.000001))(futures, initial_state=[hidden,cell])
    
    
    encoder_outputs, hidden_state, cell_state = tf.keras.layers.LSTM(config["ENCODER_SIZE"], 
                                                       return_sequences=True, 
                                                       return_state=True, 
                                                       recurrent_initializer='glorot_uniform', 
                                                       activity_regularizer=L1L2(l1=0.000001, l2=0.000001))(inputs)
    
    if(config["ATTENTION_MODULE"] == "additive"):
        if(config["ATTENTION_METHOD"] == "standard"):
            attention_layer = BahdanauAttentionLayer(config["ATTENTION_SIZE"], use_bias=False)
        elif(config["ATTENTION_METHOD"] == "vectorized"):
            attention_layer = BahdanauAttention(config["ATTENTION_SIZE"], use_bias=False)
        else:
            raise Exception(f"Invalid config ATTENTION_METHOD of {config['ATTENTION_METHOD']}")
    elif(config["ATTENTION_MODULE"] == "multiplicative"):
        if(config["ATTENTION_METHOD"] == "standard"):
            attention_layer = LuongAttentionLayer(config["ATTENTION_SIZE"], use_bias=False)
        elif(config["ATTENTION_METHOD"] == "vectorized"):
            attention_layer = LuongAttention(config["ATTENTION_SIZE"], use_bias=False)
        else:
            raise Exception(f"Invalid config ATTENTION_METHOD of {config['ATTENTION_METHOD']}")
    else:
        raise Exception(f"Invalid config ATTENTION_MODULE of {config['ATTENTION_MODULE']}")
    
    decoder = tf.keras.layers.LSTM(config["DECODER_SIZE"],
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform', 
                                   activity_regularizer=L1L2(l1=0.000001, l2=0.000001))
    dropout = tf.keras.layers.Dropout(0.2)
    decoder_output = tf.keras.layers.Dense(1)
    all_outputs = []
    last_value = tf.expand_dims(inputs[:, -1, 0:1], 1)
    states = [hidden_state, cell_state]
    for i in range(config["N_FWD"]):
        use_verbose = False
        if(i == 0):
            use_verbose=True
        context_vector = attention_layer(query=tf.expand_dims(states[0],1), values=encoder_outputs, verbose=use_verbose)
        decoder_input = tf.concat((last_value, context_vector), axis=-1)
        x, hidden_state, cell_state = decoder(decoder_input, initial_state=states)
        states=[hidden_state, cell_state]
        x = dropout(x)
        last_value = decoder_output(x)
        all_outputs.append(last_value)
    outputs = tf.keras.layers.Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    model.compile(loss="mse", metrics=["mae"], optimizer=tf.keras.optimizers.Adam(learning_rate=config["LR"]))
    return model

model = simple_encoder_decoder(X_train[0].shape[1:], X_train[1].shape[1:])
model.summary()

Metal device set to: Apple M1 Pro


2022-12-13 10:41:42.779278: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-12-13 10:41:42.779544: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


[<KerasTensor: shape=(None, 72, 32) dtype=float32 (created by layer 'lstm_1')>, <KerasTensor: shape=(None, 32) dtype=float32 (created by layer 'lstm_1')>, <KerasTensor: shape=(None, 32) dtype=float32 (created by layer 'lstm_1')>]


ValueError: An `initial_state` was passed that is not compatible with `cell.state_size`. Received `state_spec`=ListWrapper([InputSpec(shape=(None, 32), ndim=2), ListWrapper([InputSpec(shape=(None, 72, 32), ndim=3), InputSpec(shape=(None, 32), ndim=2), InputSpec(shape=(None, 32), ndim=2)])]); however `cell.state_size` is [32, 32]

In [6]:
del model
gc.collect()

126967

## **Train Loop**

In [7]:
def train_model(model, X_train, y_train, X_val, y_val):
    reducer = tf.keras.callbacks.ReduceLROnPlateau(monior='val_loss', factor=0.1, patience=2, mode='min', cooldown=1)
    stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, mode='min', restore_best_weights=True)
    model.fit(X_train, y_train,
              batch_size=config["BATCH_SIZE"],
              epochs=config["EPOCHS"], 
              callbacks=[reducer, stopper, WandbCallback()],
              validation_data=(X_val, y_val),
              validation_batch_size=config["BATCH_SIZE"],
              shuffle=True)
    return model

In [8]:
def train():
    for fold in range(1, 4):
        run = wandb.init(project="time-series-methods", entity="kmcguigan", group=f"{MODEL}-model", config=config, job_type="train")
        run.name = f'{MODEL}-fold-{fold}'
        X_train, y_train, X_val, y_val, _ = get_data(fold)
        model = simple_encoder_decoder()
        model = train_model(model, X_train, y_train, X_val, y_val)
        run.finish()
        del model
        del X_train
        del y_train
        del X_val
        del y_val
        gc.collect()
    return

In [None]:
train()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30



VBox(children=(Label(value='0.225 MB of 0.225 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇▇██
loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,███████▂▂▂▂▂▂▂▂▂▂▂▂▁▁
mae,█▃▃▂▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▃▄▃▅▃▂▂▂▂▁▁▁▂▁▁▁▁▁▁
val_mae,█▅▄▄▃▅▃▂▂▂▂▁▁▁▂▁▁▁▁▁▁

0,1
best_epoch,16.0
best_val_loss,0.1116
epoch,20.0
loss,0.13819
lr,1e-05
mae,0.27462
val_loss,0.11247
val_mae,0.24331


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30