In [1]:
import os
import pickle

import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.regularizers import L1L2

In [2]:
config = {
    "N_PREV": 90,
    "N_FWD": 30,
    
    "ENCODER_SIZE": 16,
    "DECODER_SIZE": 16,
    "ATTENTION_SIZE": 8,
    
    "ATTENTION_MODULE": "additive",
    "ATTENTION_METHOD": "standard",
    
    "LR": 0.001,
    "BATCH_SIZE": 64,
    "EPOCHS": 30,
}

MODEL = f"{config['ATTENTION_METHOD']}-{config['ATTENTION_MODULE']}-attention"

In [3]:
import wandb
from wandb.keras import WandbCallback
from secrets import WANDB
wandb.login(key=WANDB)

[34m[1mwandb[0m: Currently logged in as: [33mall-off-nothing[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\kiern/.netrc


True

## **Load Data Function**

In [4]:
def get_data(fold):
    with open(f'./data/fold{fold}_data.npy', mode='rb') as f:
        X_train = np.load(f)
        y_train = np.load(f)
        X_val = np.load(f)
        y_val = np.load(f)
    with open(f'./data/fold{fold}_normalizer.pkl', mode='rb') as f:
        normalizer = pickle.load(f)
    return X_train, y_train, X_val, y_val, normalizer
        

## **Model Functions**

In [5]:
class BahdanauAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttentionLayer, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = layers.Dense(self.units, use_bias=self.use_bias)
    def get_config(self):
        config = super(BahdanauAttentionLayer, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        expanded_query = tf.expand_dims(query, 2)
        encoded_query = self.W1(expanded_query)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            keys = tf.expand_dims(keys, 1)
            encoded_keys = self.W2(keys)
        combined_encoded_query_and_keys = encoded_query + encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = tf.reduce_sum(tanh_score, axis=-1)
        attention_weights = tf.nn.softmax(score, axis=-1)
        overall_context_vector = tf.matmul(attention_weights, values)
        context_vector = overall_context_vector
        return context_vector

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttention, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = layers.Dense(self.units, use_bias=self.use_bias)
        self.V = layers.Dense(1)
    def get_config(self):
        config = super(BahdanauAttention, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        query_with_time_axis = tf.expand_dims(query, 2)
        encoded_query = self.W1(query_with_time_axis)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            keys = tf.expand_dims(keys, 1)
            encoded_keys = self.W2(keys)
        combined_encoded_query_and_keys = encoded_query + encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = self.V(tanh_score)
        squeezed_score = tf.squeeze(score, axis=-1)
        attention_weights = tf.nn.softmax(squeezed_score, axis=-1)
        context_vector = tf.matmul(attention_weights, values)
        return context_vector
    
class LuongAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttentionLayer, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = layers.Dense(self.units, use_bias=self.use_bias)
    def get_config(self):
        config = super(BahdanauAttentionLayer, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        expanded_query = tf.expand_dims(query, 2)
        encoded_query = self.W1(expanded_query)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            keys = tf.expand_dims(keys, 1)
            encoded_keys = self.W2(keys)
        combined_encoded_query_and_keys = encoded_query * encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = tf.reduce_sum(tanh_score, axis=-1)
        attention_weights = tf.nn.softmax(score, axis=-1)
        overall_context_vector = tf.matmul(attention_weights, values)
        context_vector = overall_context_vector
        return context_vector

class LuongAttention(tf.keras.layers.Layer):
    def __init__(self, units, use_bias=False):
        super(BahdanauAttention, self).__init__()
        self.units = units
        self.use_bias = use_bias
        self.W1 = layers.Dense(self.units, use_bias=self.use_bias)
        self.W2 = layers.Dense(self.units, use_bias=self.use_bias)
        self.V = layers.Dense(1)
    def get_config(self):
        config = super(BahdanauAttention, self).get_config()
        config.update({"units": self.units, "use_bias":self.use_bias})
        return config
    def call(self, query, values, keys=None, verbose=False):
        query_with_time_axis = tf.expand_dims(query, 2)
        encoded_query = self.W1(query_with_time_axis)
        if(keys is None):
            encoded_keys = self.W2(tf.expand_dims(values, 1))
        else:
            keys = tf.expand_dims(keys, 1)
            encoded_keys = self.W2(keys)
        combined_encoded_query_and_keys = encoded_query * encoded_keys
        tanh_score = tf.nn.tanh(combined_encoded_query_and_keys)
        score = self.V(tanh_score)
        squeezed_score = tf.squeeze(score, axis=-1)
        attention_weights = tf.nn.softmax(squeezed_score, axis=-1)
        context_vector = tf.matmul(attention_weights, values)
        return context_vector

def simple_encoder_decoder():
    inputs = tf.keras.layers.Input(shape=(config["N_PREV"], 1))
    x, hidden_state, cell_state = tf.keras.layers.LSTM(config["ENCODER_SIZE"], 
                                                       return_sequences=True, 
                                                       return_state=True, 
                                                       recurrent_initializer='glorot_uniform', 
                                                       activity_regularizer=L1L2(l1=0.000001, l2=0.000001))(inputs)
    
    if(config["ATTENTION_MODULE"] == "additive"):
        if(config["ATTENTION_METHOD"] == "standard"):
            attention_layer = BahdanauAttentionLayer(config["ATTENTION_SIZE"], use_bias=True)
        elif(config["ATTENTION_METHOD"] == "vectorized"):
            attention_layer = BahdanauAttention(config["ATTENTION_SIZE"], use_bias=True)
        else:
            raise Exception(f"Invalid config ATTENTION_METHOD of {config["ATTENTION_METHOD"]}")
    elif(config["ATTENTION_MODULE"] == "multiplicative"):
        if(config["ATTENTION_METHOD"] == "standard"):
            attention_layer = LuongAttentionLayer(config["ATTENTION_SIZE"], use_bias=True)
        elif(config["ATTENTION_METHOD"] == "vectorized"):
            attention_layer = LuongAttention(config["ATTENTION_SIZE"], use_bias=True)
        else:
            raise Exception(f"Invalid config ATTENTION_METHOD of {config["ATTENTION_METHOD"]}")
    else:
        raise Exception(f"Invalid config ATTENTION_MODULE of {config["ATTENTION_MODULE"]}")
    decoder = tf.keras.layers.LSTM(config["DECODER_SIZE"],
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform', 
                                   activity_regularizer=L1L2(l1=0.000001, l2=0.000001))
    dropout = tf.keras.layers.Dropout(0.2)
    decoder_output = tf.keras.layers.Dense(1)
    all_outputs = []
    last_value = tf.expand_dims(inputs[:, -1, 0:1], 1)
    states = [hidden_state, cell_state]
    for i in range(config["N_FWD"]):
        x, hidden_state, cell_state = decoder(last_value, initial_state=states)
        states=[hidden_state, cell_state]
        x = dropout(x)
        x = decoder_output(x)
        last_value = x
        all_outputs.append(x)
    outputs = tf.keras.layers.Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    model.compile(loss="mse", metrics=["mae"], optimizer=tf.keras.optimizers.Adam(learning_rate=config["LR"]))
    return model

## **Train Loop**

In [6]:
def train_model(model, X_train, y_train, X_val, y_val):
    reducer = tf.keras.callbacks.ReduceLROnPlateau(monior='val_loss', factor=0.1, patience=2, mode='min', cooldown=1)
    stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, mode='min', restore_best_weights=True)
    model.fit(X_train, y_train,
              batch_size=config["BATCH_SIZE"],
              epochs=config["EPOCHS"], 
              callbacks=[reducer, stopper],
              validation_data=(X_val, y_val),
              validation_batch_size=config["BATCH_SIZE"],
             shuffle=True)
    return model

In [7]:
def train():
    for fold in range(1, 4):
        run = wandb.init(project="time-series-methods", entity="kmcguigan", group=f"{MODEL}-model", config=config, job_type="train")
        run.name = f'{MODEL}-fold-{fold}'
        X_train, y_train, X_val, y_val, normalizer = get_data(fold)
        model = simple_encoder_decoder()
        model = train_model(model, X_train, y_train, X_val, y_val)
        run.finish()
    return

In [8]:
train()

NameError: name 'pickle' is not defined