In [62]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


# NOTEBOOK DESCRIPTION
The goal of this notebook is to train initial global model on a whole set of input time series. Extended N-BEATS architecture and data generator (which creates batches of samples during training) functions are loaded from "/modules/tensorflow_helper_func.py"

# LIBS

In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import warnings
import random

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import *
from tensorflow.keras.regularizers import *

In [2]:
cwd = os.path.dirname(os.getcwd())

# run tensorflow_helper_func.py
tensorflow_helper_func_path =    '"{}/modules/tensorflow_helper_func.py"'.format(cwd)
%run $tensorflow_helper_func_path

# 0. INPUT DATA

In [5]:
cwd = os.path.dirname(os.getcwd())
folder_mod_global = cwd + "/models/global N-BEATS-exog/"

folder_gen = cwd + "/generated_data/"
folder_data = cwd + "/generated_data/dataset_separated_by_ts/"

train_dict = pickle.load(open(folder_gen + "train_dict.p",'rb'))
val_dict = pickle.load(open(folder_gen + "val_dict.p",'rb'))

# OTHER
freq = "30min"
periods = 48
idx = pd.date_range("2009-07-20", "2009-12-07", freq="30min", closed="left")
y_cols = ["H_{}".format(i) for i in range(1, periods+1)]

train_idx = pd.date_range("2009-07-27", "2010-07-12", freq="30min", closed="left", name="timestamp")
val_idx = pd.date_range("2010-07-12", freq="30min", periods=periods*7*12, name="timestamp")
test_idx = pd.date_range("2010-10-04", freq="30min", periods=periods*7*12, name="timestamp")

# 1. TRAINING
- the learning rate is initially set to lr = 0.001 and reduced 3 times by a factor of 10 every time the validation loss plateaus
- training works well on CPU instance (it was trained on AWS instance ml.m4.4xlarge)

In [6]:
warnings.filterwarnings('ignore')
np.random.seed(0)
random.seed(0)

# TRAINING PARAMS
lr = 0.001
batch_size = 256

n_steps_per_epoch = 50
epochs = 10_000

# MODEL PARAMS
params_dict = {"input_size": [7*periods, 12+7+48],
               "output_size": periods,
               "block_layers": 3,
               "hidden_units": 512, 
               "n_blocks": 3,
               "block_sharing": False}

# INDICES for all ts
ts_ids_list = np.arange(1000).tolist()
train_idx_all, train_ids = create_ts_idx(train_idx, ts_ids_list, train_dict)
val_idx_all, val_ids = create_ts_idx(val_idx, ts_ids_list, val_dict)
test_idx_all, test_ids = create_ts_idx(test_idx, ts_ids_list)

# DATA GENERATORS
train_generator = TSGenerator(set_type="train",
                              batch_size=batch_size, 
                              n_steps_per_epoch=n_steps_per_epoch,
                              ts_ids_list=ts_ids_list)
val_generator = TSGenerator(set_type="val", ts_ids_list=ts_ids_list)
test_generator = TSGenerator(set_type="test", ts_ids_list=ts_ids_list)

# CREATE NN MODEL
model = NBeats_exog(params_dict)
optimizer = Adam(lr=lr)
model.compile(optimizer, loss="mae", metrics=["mae"])
              
# CALLBACKS
csvlogger = CSVLogger(folder_mod_global + 'temp_log.csv')
save_val_weights = ModelCheckpoint(folder_mod_global + "val_best_weights-global.h5",
                                   monitor="val_mae", save_best_only=True)
callbacks = [csvlogger] + [save_val_weights]  

# FIT
history = model.fit(train_generator,
                    validation_data=val_generator,
                    verbose=1,
                    epochs=epochs,
                    callbacks=callbacks)

# 2. PREDICT & SAVE

In [10]:
# VAL
y_val_pred = model.predict(val_generator)
y_val_pred = pd.DataFrame(y_val_pred, index=val_idx_all, columns=y_cols)
y_val_pred = pd.concat([val_ids, y_val_pred], axis=1).clip(0)

# TEST
y_test_pred = model.predict(test_generator)
y_test_pred = pd.DataFrame(y_test_pred, index=test_idx_all, columns=y_cols)
y_test_pred = pd.concat([test_ids, y_test_pred], axis=1).clip(0)

# SAVE
y_val_pred.to_pickle(folder_mod_global + "y_val_pred.p")
y_test_pred.to_pickle(folder_mod_global + "y_test_pred.p")