In [1]:
import os
import pickle
import gc

import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.regularizers import L1L2

from sklearn.model_selection import train_test_split

In [2]:
MODEL = "8-lstm-future-covariates-mdn"
COVARIATE_COLUMNS = ['covs_week_sin','covs_week_cos','covs_day_sin','covs_day_cos','covs_hour_sin','covs_hour_cos','covs_rbf_month_1','covs_rbf_month_2','covs_rbf_month_3','covs_rbf_month_4','covs_rbf_month_5','covs_rbf_month_6','covs_rbf_month_7','covs_rbf_month_8','covs_rbf_month_9','covs_rbf_month_10','covs_rbf_month_11','covs_rbf_month_12']

config = {
    "N_PREV": 120,
    "N_FORWARD": 48,
    "STEP_SIZE": 1,
    "N_PROB": 5,
    
    "LATENT_SIZE": 16,
    
    "LR": 0.001,
    "BATCH_SIZE": 256,
    "EPOCHS": 2,
    
    "L1": 0.0000001, 
    "L2": 0.0000001,
    
    "TARGET_COL": 'normalized_level',
}

In [3]:
import wandb
from wandb.keras import WandbCallback
from secrets import WANDB
wandb.login(key=WANDB)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\KiernanMcGuigan/.netrc


True

## **Load Data Function**

In [4]:
def get_samples(df, set_number):
    subset = df.loc[df.set==set_number, :]
    shape = subset.shape
    if(shape[0] > 1_000):
        subset = subset.iloc[-1_000:, :]
    data = subset[config['TARGET_COL']].values
    covariates = subset[COVARIATE_COLUMNS].values
    print(f'Data Shape: {shape}, Reduced Shape {data.shape}')
    
    X, y, future_covs, dates = [], [], [], []
    for start_idx in range(0, data.shape[0]-config["N_PREV"]-config["N_FORWARD"], config["STEP_SIZE"]):
        mid_idx = start_idx + config["N_PREV"]
        end_idx = mid_idx + config["N_FORWARD"]
        
        mus = np.zeros((config["N_PREV"],config["N_PROB"]))
        stds = np.zeros((config["N_PREV"],config["N_PROB"]))
        weights = np.zeros((config["N_PREV"],config["N_PROB"]))
        mus[:, 0] = data[start_idx:mid_idx]
        weights[:, 0] = 1.0
        
        X_data = np.stack([mus, stds, weights], axis=1)
        X.append(X_data)
        y.append(data[mid_idx:end_idx])
        future_covs.append(covariates[mid_idx:end_idx])
        dates.append(subset.iloc[mid_idx:end_idx].index.values)
        
    y = np.array(y)
    y = np.expand_dims(y, axis=-1)
    return np.array(X), y, np.array(future_covs), np.array(dates)

def to_dataset(X1, X2, y):
    dataset = tf.data.Dataset.from_tensor_slices(({'series':X1, 'future_dates':X2, 'targets':y}, y))
    dataset = dataset.cache().shuffle(X1.shape[0]+1).batch(config["BATCH_SIZE"]).prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

def get_train_and_val_datasets(X1, X2, y):
    X1_train, X1_val, X2_train, X2_val, y_train, y_val = train_test_split(X1, X2, y, test_size=0.15, shuffle=True)
    print(f"Train shape: {X1_train.shape}, {X2_train.shape}, {y_train.shape}")
    print(f"Val shape: {X1_val.shape}, {X2_val.shape}, {y_val.shape}")
    train_ds = to_dataset(X1_train, X2_train, y_train)
    val_ds = to_dataset(X1_val, X2_val, y_val)
    return train_ds, val_ds

def df_to_samples(df):
    X, y, future, _ = get_samples(df, 0)
    train_ds, val_ds = get_train_and_val_datasets(X, future, y)
    X_test, y_test, future_test, dates = get_samples(df, 1)
    print(f"Test shape: {X_test.shape}, {future_test.shape}, {y_test.shape}, {dates.shape}")
    return train_ds, val_ds, X_test, y_test, future_test, dates
    
def get_data(fold):
    df = pd.read_csv(f'data/fold{fold}_data_v3.csv')
    with open(f'./data/fold{fold}_normalizer_v3.pkl', mode='rb') as f:
        normalizer = pickle.load(f)
    return df_to_samples(df), normalizer

args, _ = get_data(1)
del args
gc.collect()

Data Shape: (159096, 23), Reduced Shape (1000,)
Train shape: (707, 120, 3, 5), (707, 48, 18), (707, 48, 1)
Val shape: (125, 120, 3, 5), (125, 48, 18), (125, 48, 1)
Data Shape: (43390, 23), Reduced Shape (1000,)
Test shape: (832, 120, 3, 5), (832, 48, 18), (832, 48, 1), (832, 48)


0

## **Model Functions**

In [5]:
def nnelu(input):
    """ Computes the Non-Negative Exponential Linear Unit
    """
    return tf.add(tf.constant(1, dtype=tf.float32), tf.nn.elu(input))

tf.keras.utils.get_custom_objects().update({'nnelu': tf.keras.layers.Activation(nnelu)})

def identity(y_true, y_pred):
    return K.mean(y_pred)

def variational_loss(y_true, mu, sigma):
    inv_sigma_2 = 1 / K.square(sigma + K.epsilon())
    phi = inv_sigma_2 * K.exp(-inv_sigma_2 * K.square(y_true - mu))
    return -K.log(phi + K.epsilon())

def mdn_loss(y_true, mu, sigma, alpha):
    inv_sigma_2 = 1 / K.square(sigma + K.epsilon())
    phi = inv_sigma_2 * K.exp(-inv_sigma_2 * K.square(y_true - mu))
    loss = -K.log(phi + K.epsilon())
    return K.mean(loss * alpha)

# Test where sigma depends on the predicted mu concat with the input data

def get_model():
    inputs = tf.keras.layers.Input(shape=(config["N_PREV"], 3, config['N_PROB']), name='series')
    future_covs = tf.keras.layers.Input(shape=(config["N_FORWARD"], len(COVARIATE_COLUMNS)), name='future_dates')
    targets = tf.keras.layers.Input(shape=(config["N_FORWARD"], 1), name='targets')
    
    reshaper = tf.keras.layers.Reshape((1, 3*config['N_PROB']))
    lstm1 = tf.keras.layers.LSTM(config["LATENT_SIZE"], 
                                 return_sequences=True, 
                                 return_state=True, 
                                 recurrent_initializer='glorot_uniform', 
                                 activity_regularizer=L1L2(l1=config['L1'],l2=config['L2']))
    lstm2 = tf.keras.layers.LSTM(config["LATENT_SIZE"], 
                                 return_sequences=True, 
                                 return_state=True, 
                                 recurrent_initializer='glorot_uniform', 
                                 activity_regularizer=L1L2(l1=config['L1'],l2=config['L2']))
    lstm3 = tf.keras.layers.LSTM(config["LATENT_SIZE"], 
                                 return_sequences=False, 
                                 return_state=True, 
                                 recurrent_initializer='glorot_uniform', 
                                 activity_regularizer=L1L2(l1=config['L1'],l2=config['L2']))
    dense1 = tf.keras.layers.Dense(32)
    dense2 = tf.keras.layers.Dense(16)
    
    # create the mdn layers
    mu_layer = tf.keras.layers.Dense(config['N_PROB'], name="mu")
    sigma_layer = tf.keras.layers.Dense(config['N_PROB'], name="sigma", activation="softplus")
    alpha_layer = tf.keras.layers.Dense(config['N_PROB'], name="alpha", activation="softmax")
    loss_layer = tf.keras.layers.Lambda(lambda x: mdn_loss(x[0], x[1], x[2], x[3]), name="loss")
    
    forecasts = []
    losses = []
    
    x = tf.keras.layers.Reshape((config["N_PREV"], 3*config['N_PROB']))(inputs)
    x, hidden_state1, cell_state1 = lstm1(x)
    x, hidden_state2, cell_state2 = lstm2(x)
    x, hidden_state3, cell_state3 = lstm3(x)
    x = K.concatenate([x, future_covs[:, 0, :]])
    x = dense1(x)
    x = dense2(x)
    
    mu = mu_layer(x)
    sigma = sigma_layer(x)
    alpha = alpha_layer(x)
    
    stacked_prediction = tf.stack([mu, sigma, alpha], axis=1)
    
    forecasts.append(stacked_prediction)
    losses.append(loss_layer([targets[:, 0], mu, sigma, alpha]))
    
    prediction = reshaper(stacked_prediction)
    
    for idx in range(1, config['N_FORWARD']):
        x = prediction
        x, hidden_state1, cell_state1 = lstm1(x, initial_state=[hidden_state1, cell_state1])
        x, hidden_state2, cell_state2 = lstm2(x, initial_state=[hidden_state2, cell_state2])
        x, hidden_state3, cell_state3 = lstm3(x, initial_state=[hidden_state3, cell_state3])
        x = K.concatenate([x, future_covs[:, idx, :]])
        x = dense1(x)
        x = dense2(x)
        
        mu = mu_layer(x)
        sigma = sigma_layer(x)
        alpha = alpha_layer(x)
        
        stacked_prediction = tf.stack([mu, sigma, alpha], axis=1)
        forecasts.append(stacked_prediction)
        losses.append(loss_layer([targets[:, idx], mu, sigma, alpha]))
        
        prediction = reshaper(stacked_prediction)
        
#     forecasts_output = tf.keras.layers.Lambda(lambda x: K.concatenate(x, axis=-1))(forecasts)
    forecasts_output = tf.stack(forecasts, axis=1)
    loss_output = tf.stack(losses, axis=-1)
    # consider multiplying by a decay rate function so that more recent forecasts are of higher importance
    
    model = tf.keras.models.Model(inputs=[inputs,future_covs,targets], outputs=loss_output)
    forecaster = tf.keras.models.Model(inputs=[inputs,future_covs], outputs=forecasts_output)
    model.compile(loss=identity, metrics=["mae"], optimizer=tf.keras.optimizers.Adam(learning_rate=config["LR"]))
    return model, forecaster

# def get_model():
#     model = MyModel()
#     model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(config["LR"]), metrics=['mae'])
#     model.predict(tf.ones(shape=(config['BATCH_SIZE'],config['N_PREV'],1)))
#     return model
    
test_model, _ = get_model()
print(test_model.summary())
del test_model
gc.collect()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 series (InputLayer)            [(None, 120, 3, 5)]  0           []                               
                                                                                                  
 reshape_1 (Reshape)            (None, 120, 15)      0           ['series[0][0]']                 
                                                                                                  
 lstm (LSTM)                    multiple             2048        ['reshape_1[0][0]',              
                                                                  'reshape[0][0]',                
                                                                  'lstm[0][1]',                   
                                                                  'lstm[0][2]',               

                                                                  'reshape[25][0]',               
                                                                  'lstm[25][1]',                  
                                                                  'lstm[25][2]',                  
                                                                  'reshape[26][0]',               
                                                                  'lstm[26][1]',                  
                                                                  'lstm[26][2]',                  
                                                                  'reshape[27][0]',               
                                                                  'lstm[27][1]',                  
                                                                  'lstm[27][2]',                  
                                                                  'reshape[28][0]',               
          

                                                                  'lstm[6][0]',                   
                                                                  'lstm_1[5][1]',                 
                                                                  'lstm_1[5][2]',                 
                                                                  'lstm[7][0]',                   
                                                                  'lstm_1[6][1]',                 
                                                                  'lstm_1[6][2]',                 
                                                                  'lstm[8][0]',                   
                                                                  'lstm_1[7][1]',                 
                                                                  'lstm_1[7][2]',                 
                                                                  'lstm[9][0]',                   
          

                                                                  'lstm_1[32][2]',                
                                                                  'lstm[34][0]',                  
                                                                  'lstm_1[33][1]',                
                                                                  'lstm_1[33][2]',                
                                                                  'lstm[35][0]',                  
                                                                  'lstm_1[34][1]',                
                                                                  'lstm_1[34][2]',                
                                                                  'lstm[36][0]',                  
                                                                  'lstm_1[35][1]',                
                                                                  'lstm_1[35][2]',                
          

                                                                  'lstm_1[13][0]',                
                                                                  'lstm_2[12][1]',                
                                                                  'lstm_2[12][2]',                
                                                                  'lstm_1[14][0]',                
                                                                  'lstm_2[13][1]',                
                                                                  'lstm_2[13][2]',                
                                                                  'lstm_1[15][0]',                
                                                                  'lstm_2[14][1]',                
                                                                  'lstm_2[14][2]',                
                                                                  'lstm_1[16][0]',                
          

                                                                  'lstm_2[39][2]',                
                                                                  'lstm_1[41][0]',                
                                                                  'lstm_2[40][1]',                
                                                                  'lstm_2[40][2]',                
                                                                  'lstm_1[42][0]',                
                                                                  'lstm_2[41][1]',                
                                                                  'lstm_2[41][2]',                
                                                                  'lstm_1[43][0]',                
                                                                  'lstm_2[42][1]',                
                                                                  'lstm_2[42][2]',                
          

                                                                  'dense[4][0]',                  
                                                                  'dense[5][0]',                  
                                                                  'dense[6][0]',                  
                                                                  'dense[7][0]',                  
                                                                  'dense[8][0]',                  
                                                                  'dense[9][0]',                  
                                                                  'dense[10][0]',                 
                                                                  'dense[11][0]',                 
                                                                  'dense[12][0]',                 
                                                                  'dense[13][0]',                 
          

                                                                  'dense_1[38][0]',               
                                                                  'dense_1[39][0]',               
                                                                  'dense_1[40][0]',               
                                                                  'dense_1[41][0]',               
                                                                  'dense_1[42][0]',               
                                                                  'dense_1[43][0]',               
                                                                  'dense_1[44][0]',               
                                                                  'dense_1[45][0]',               
                                                                  'dense_1[46][0]',               
                                                                  'dense_1[47][0]']               
          

                                                                  'dense_1[23][0]',               
                                                                  'dense_1[24][0]',               
                                                                  'dense_1[25][0]',               
                                                                  'dense_1[26][0]',               
                                                                  'dense_1[27][0]',               
                                                                  'dense_1[28][0]',               
                                                                  'dense_1[29][0]',               
                                                                  'dense_1[30][0]',               
                                                                  'dense_1[31][0]',               
                                                                  'dense_1[32][0]',               
          

                                                                 ]']                              
                                                                                                  
 tf.stack_1 (TFOpLambda)        (None, 3, 5)         0           ['mu[1][0]',                     
                                                                  'sigma[1][0]',                  
                                                                  'alpha[1][0]']                  
                                                                                                  
 tf.__operators__.getitem_4 (Sl  (None, 18)          0           ['future_dates[0][0]']           
 icingOpLambda)                                                                                   
                                                                                                  
 tf.concat_2 (TFOpLambda)       (None, 34)           0           ['lstm_2[2][0]',                 
          

 tf.__operators__.getitem_18 (S  (None, 18)          0           ['future_dates[0][0]']           
 licingOpLambda)                                                                                  
                                                                                                  
 tf.concat_9 (TFOpLambda)       (None, 34)           0           ['lstm_2[9][0]',                 
                                                                  'tf.__operators__.getitem_18[0][
                                                                 0]']                             
                                                                                                  
 tf.stack_9 (TFOpLambda)        (None, 3, 5)         0           ['mu[9][0]',                     
                                                                  'sigma[9][0]',                  
                                                                  'alpha[9][0]']                  
          

                                                                                                  
 tf.stack_16 (TFOpLambda)       (None, 3, 5)         0           ['mu[16][0]',                    
                                                                  'sigma[16][0]',                 
                                                                  'alpha[16][0]']                 
                                                                                                  
 tf.__operators__.getitem_34 (S  (None, 18)          0           ['future_dates[0][0]']           
 licingOpLambda)                                                                                  
                                                                                                  
 tf.concat_17 (TFOpLambda)      (None, 34)           0           ['lstm_2[17][0]',                
                                                                  'tf.__operators__.getitem_34[0][
          

 licingOpLambda)                                                                                  
                                                                                                  
 tf.concat_24 (TFOpLambda)      (None, 34)           0           ['lstm_2[24][0]',                
                                                                  'tf.__operators__.getitem_48[0][
                                                                 0]']                             
                                                                                                  
 tf.stack_24 (TFOpLambda)       (None, 3, 5)         0           ['mu[24][0]',                    
                                                                  'sigma[24][0]',                 
                                                                  'alpha[24][0]']                 
                                                                                                  
 tf.__oper

 tf.stack_31 (TFOpLambda)       (None, 3, 5)         0           ['mu[31][0]',                    
                                                                  'sigma[31][0]',                 
                                                                  'alpha[31][0]']                 
                                                                                                  
 tf.__operators__.getitem_64 (S  (None, 18)          0           ['future_dates[0][0]']           
 licingOpLambda)                                                                                  
                                                                                                  
 tf.concat_32 (TFOpLambda)      (None, 34)           0           ['lstm_2[32][0]',                
                                                                  'tf.__operators__.getitem_64[0][
                                                                 0]']                             
          

                                                                                                  
 tf.concat_39 (TFOpLambda)      (None, 34)           0           ['lstm_2[39][0]',                
                                                                  'tf.__operators__.getitem_78[0][
                                                                 0]']                             
                                                                                                  
 tf.stack_39 (TFOpLambda)       (None, 3, 5)         0           ['mu[39][0]',                    
                                                                  'sigma[39][0]',                 
                                                                  'alpha[39][0]']                 
                                                                                                  
 tf.__operators__.getitem_80 (S  (None, 18)          0           ['future_dates[0][0]']           
 licingOpL

                                                                  'sigma[46][0]',                 
                                                                  'alpha[46][0]']                 
                                                                                                  
 tf.__operators__.getitem_94 (S  (None, 18)          0           ['future_dates[0][0]']           
 licingOpLambda)                                                                                  
                                                                                                  
 tf.concat_47 (TFOpLambda)      (None, 34)           0           ['lstm_2[47][0]',                
                                                                  'tf.__operators__.getitem_94[0][
                                                                 0]']                             
                                                                                                  
 targets (

                                                                                                  
 tf.__operators__.getitem_49 (S  (None, 1)           0           ['targets[0][0]']                
 licingOpLambda)                                                                                  
                                                                                                  
 tf.__operators__.getitem_51 (S  (None, 1)           0           ['targets[0][0]']                
 licingOpLambda)                                                                                  
                                                                                                  
 tf.__operators__.getitem_53 (S  (None, 1)           0           ['targets[0][0]']                
 licingOpLambda)                                                                                  
                                                                                                  
 tf.__oper

                                                                  'tf.__operators__.getitem_5[0][0
                                                                 ]',                              
                                                                  'mu[2][0]',                     
                                                                  'sigma[2][0]',                  
                                                                  'alpha[2][0]',                  
                                                                  'tf.__operators__.getitem_7[0][0
                                                                 ]',                              
                                                                  'mu[3][0]',                     
                                                                  'sigma[3][0]',                  
                                                                  'alpha[3][0]',                  
          

                                                                  'sigma[18][0]',                 
                                                                  'alpha[18][0]',                 
                                                                  'tf.__operators__.getitem_39[0][
                                                                 0]',                             
                                                                  'mu[19][0]',                    
                                                                  'sigma[19][0]',                 
                                                                  'alpha[19][0]',                 
                                                                  'tf.__operators__.getitem_41[0][
                                                                 0]',                             
                                                                  'mu[20][0]',                    
          

                                                                 0]',                             
                                                                  'mu[35][0]',                    
                                                                  'sigma[35][0]',                 
                                                                  'alpha[35][0]',                 
                                                                  'tf.__operators__.getitem_73[0][
                                                                 0]',                             
                                                                  'mu[36][0]',                    
                                                                  'sigma[36][0]',                 
                                                                  'alpha[36][0]',                 
                                                                  'tf.__operators__.getitem_75[0][
          

                                                                  'loss[18][0]',                  
                                                                  'loss[19][0]',                  
                                                                  'loss[20][0]',                  
                                                                  'loss[21][0]',                  
                                                                  'loss[22][0]',                  
                                                                  'loss[23][0]',                  
                                                                  'loss[24][0]',                  
                                                                  'loss[25][0]',                  
                                                                  'loss[26][0]',                  
                                                                  'loss[27][0]',                  
          

90

## **Train Loop**

In [6]:
def train_model(model, train_ds, val_ds):
    reducer = tf.keras.callbacks.ReduceLROnPlateau(monior='val_loss', factor=0.1, patience=2, mode='min', cooldown=1)
    stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, mode='min', restore_best_weights=True)
    model.fit(train_ds,
              epochs=config["EPOCHS"], 
              callbacks=[reducer, stopper, WandbCallback()],
              validation_data=val_ds)
    return

def sample():
    return

def test_model(forecaster, X_test, y_test, future_test, normalizer):
    predictions = forecaster.predict({'series':X_test, 'future_dates':future_test})
    print(predictions.shape)
    inversed_predictions, inversed_actuals = [], []
    for pred, actual in zip(predictions, y_test):
        inversed_predictions.append(normalizer.inverse_transform(np.array(pred).reshape(-1,1)).reshape((-1)))
        inversed_actuals.append(normalizer.inverse_transform(np.array(actual).reshape(-1,1)).reshape((-1)))
    inversed_predictions = np.array(inversed_predictions)
    inversed_actuals = np.array(inversed_actuals)
    forward_error = np.mean(np.abs(inversed_actuals - inversed_predictions), axis=-1)
    error = np.mean(forward_error)
    return error, forward_error, inversed_predictions, inversed_actuals

def record_forward_error(forward_errors, error, run):
    run.log({'test/error': error})
    run.log({'test/forward_error': wandb.plot.line_series(
        xs=[i+1 for i in range(forward_errors.shape[0])],
        ys=[forward_errors],
        keys=['Forward Horizon Errors'],
        title=f'Forward Horizon Errors (MAE: {error})',
    )})
        
def graph_predictions(predictions, actuals, dates, run):
    for look_ahead in [1, 12, 24, 36, 48]:
        preds, true, time = predictions[:, look_ahead-1], actuals[:, look_ahead-1], dates[:, look_ahead-1]
        error = np.mean(np.abs(true - preds))
        run.log({f'test/forecast-{look_ahead}-hours-ahead': wandb.plot.line_series(
            xs=time,
            ys=[true, preds],
            keys=['Actuals', 'Predictions'],
            title=f'{look_ahead} Hour(s) Ahead Forecast (MAE: {error})',
            xname='week'
        )})

In [7]:
def train():
    for fold in range(1, 4):
        run = wandb.init(project="time-series-methods", entity="kmcguigan", group=f"{MODEL}-model", config=config, job_type="train")
        run.name = f'{MODEL}-fold-{fold}'
        (train_ds, val_ds, X_test, y_test, future_test, dates), normalizer = get_data(fold)
        model, forecaster = get_model()
        train_model(model, train_ds, val_ds)
        error, forward_errors, predictions, actuals = test_model(forecaster, X_test, y_test, future_test, normalizer)
        run.log({'test-error': error})
        record_forward_error(forward_errors, error, run)
        graph_predictions(predictions, actuals, dates, run)
        run.finish()
        del model
        del train_ds
        del val_ds
        gc.collect()
    return

In [8]:
train()

[34m[1mwandb[0m: Currently logged in as: [33mkmcguigan[0m. Use [1m`wandb login --relogin`[0m to force relogin


Data Shape: (159096, 23), Reduced Shape (1000,)
Train shape: (707, 120, 3, 5), (707, 48, 18), (707, 48, 1)
Val shape: (125, 120, 3, 5), (125, 48, 18), (125, 48, 1)
Data Shape: (43390, 23), Reduced Shape (1000,)
Test shape: (832, 120, 3, 5), (832, 48, 18), (832, 48, 1), (832, 48)




Epoch 1/2
Epoch 2/2
(832, 48, 3, 5)


ValueError: operands could not be broadcast together with shapes (832,48) (832,48,3,5) 