In [1]:
%cd ../..

/Users/mlevydaniel/Desktop/modern-time-series-forecasting-with-python


In [2]:
import os
import shutil

import joblib
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"
from itertools import cycle


from pathlib import Path

from src.forecasting.ml_forecasting import (
    MissingValueConfig,
    calculate_metrics,
)
from src.utils import plotting_utils
from tqdm.autonotebook import tqdm
from src.forecasting.ml_forecasting import calculate_metrics
from src.utils import ts_utils

# %load_ext autoreload
# %autoreload 2
np.random.seed(42)
tqdm.pandas()

import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

  from tqdm.autonotebook import tqdm


In [3]:
os.makedirs("imgs/chapter_14", exist_ok=True)
os.makedirs("notebooks/Chapter14/saved_weights", exist_ok=True)

preprocessed = Path("data/london_smart_meters/preprocessed")
output = Path("data/london_smart_meters/output")
# Make True to select a subsample. Helps with faster training.
TRAIN_SUBSAMPLE = True

## Utility Functions

In [4]:
def format_plot(fig, legends=None, xlabel="Time", ylabel="Value", title="", font_size=15):
    if legends:
        names = cycle(legends)
        fig.for_each_trace(lambda t: t.update(name=next(names)))
    fig.update_layout(
        autosize=False,
        width=900,
        height=500,
        title_text=title,
        title={"x": 0.5, "xanchor": "center", "yanchor": "top"},
        titlefont={"size": 20},
        legend_title=None,
        legend=dict(
            font=dict(size=font_size),
            orientation="h",
            yanchor="bottom",
            y=0.98,
            xanchor="right",
            x=1,
        ),
        yaxis=dict(
            title_text=ylabel,
            titlefont=dict(size=font_size),
            tickfont=dict(size=font_size),
        ),
        xaxis=dict(
            title_text=xlabel,
            titlefont=dict(size=font_size),
            tickfont=dict(size=font_size),
        )
    )
    return fig

In [5]:
from itertools import cycle


def plot_forecast(pred_df, forecast_columns, forecast_display_names=None):
    if forecast_display_names is None:
        forecast_display_names = forecast_columns
    else:
        assert len(forecast_columns) == len(forecast_display_names)
    mask = ~pred_df[forecast_columns[0]].isnull()
    colors = [
        "rgba(" + ",".join([str(c) for c in plotting_utils.hex_to_rgb(c)]) + ",<alpha>)"
        for c in px.colors.qualitative.Plotly
    ]
    act_color = colors[0]
    colors = cycle(colors[1:])
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=pred_df[mask].index,
            y=pred_df[mask].energy_consumption,
            mode="lines",
            line=dict(color=act_color.replace("<alpha>", "0.9")),
            name="Actual Consumption",
        )
    )
    for col, display_col in zip(forecast_columns, forecast_display_names):
        fig.add_trace(
            go.Scatter(
                x=pred_df[mask].index,
                y=pred_df.loc[mask, col],
                mode="lines",
                line=dict(dash="dot", color=next(colors).replace("<alpha>", "1")),
                name=display_col,
            )
        )
    return fig

def highlight_abs_min(s, props=''):
    return np.where(s.abs() == np.nanmin(np.abs(s.values)), props, '')

def evaluate_forecast(pred_df, train_data, fc_column, name, target_name="energy_consumption"):
    metric_l = []
    for _id in tqdm(pred_df.index.get_level_values(0).unique(), desc="Calculating metrics..."):
        target = pred_df.xs(_id)[[target_name]]
        _y_pred = pred_df.xs(_id)[[fc_column]]
        history = train_data.xs(_id)[[target_name]]
        # display(history.tail())
        # display(_y_pred.head())
        # display(target.head())
        metric_l.append(
            calculate_metrics(target, _y_pred, name=name, y_train=history)
        )
    eval_metrics_df = pd.DataFrame(metric_l)
    agg_metrics = {
            "Algorithm": name,
            "MAE": np.nanmean(np.abs(pred_df[fc_column]-pred_df[target_name])),
            "MSE": np.nanmean(np.power(pred_df[fc_column]-pred_df[target_name], 2)),
            "meanMASE": eval_metrics_df.loc[:, "MASE"].mean(),
            "Forecast Bias": 100*(np.nansum(pred_df[fc_column])-np.nansum(pred_df[target_name]))/np.nansum(pred_df[target_name])
    }
    return agg_metrics, eval_metrics_df

# from pytorch_lightning.utilities.cloud_io import load as pl_load
import torch

def load_weights(model, weight_path):
    state_dict = torch.load(weight_path)
    model.load_state_dict(state_dict)

In [6]:
from collections import namedtuple

FeatureConfig = namedtuple(
    "FeatureConfig",
    [
        "target",
        "index_cols",
        "static_categoricals",
        "static_reals",
        "time_varying_known_categoricals",
        "time_varying_known_reals",
        "time_varying_unknown_reals",
        "group_ids"
    ],
)

## Reading the data

In [7]:
try:
    #Reading the missing value imputed and train test split data
    train_df = pd.read_parquet(preprocessed/"selected_blocks_train_missing_imputed_feature_engg.parquet")
    
    # Read in the Validation dataset as test_df so that we predict on it
    test_df = pd.read_parquet(preprocessed/"selected_blocks_val_missing_imputed_feature_engg.parquet")
    # test_df = pd.read_parquet(preprocessed/"selected_blocks_test_missing_imputed_feature_engg.parquet")
except FileNotFoundError:
    display(HTML("""
    <div class="alert alert-block alert-warning">
    <b>Warning!</b> File not found. Please make sure you have run 01-Feature Engineering.ipynb in Chapter06
    </div>
    """))

In [8]:
train_df['apparentTemperature'] = train_df['apparentTemperature'].interpolate(method='linear', limit_direction='both')
test_df['apparentTemperature'] = test_df['apparentTemperature'].interpolate(method='linear', limit_direction='both')

In [9]:
# To run on smaller set of data for faster iteration.
if TRAIN_SUBSAMPLE:
    print("sub sampling")
    SAMPLE = 10
    sampled_LCLids = pd.Series(train_df.LCLid.unique().remove_unused_categories().categories).sample(SAMPLE, random_state=99).tolist()
    train_df = train_df.loc[train_df.LCLid.isin(sampled_LCLids)]
    test_df = test_df.loc[test_df.LCLid.isin(sampled_LCLids)]

sub sampling


## Defining the different features

In [10]:
feat_config = FeatureConfig(
    target="energy_consumption",
    index_cols=["LCLid", "timestamp"],
    static_categoricals=[
        "LCLid",
        "stdorToU",
        "Acorn",
        "Acorn_grouped",
        "file",
    ],  # Categoricals which does not change with time
    static_reals=[],  # Reals which does not change with time
    time_varying_known_categoricals=[  # Categoricals which change with time
        "holidays",
        "timestamp_Dayofweek",
    ],
    time_varying_known_reals=[  # Reals which change with time
        "apparentTemperature",
    ],  
    time_varying_unknown_reals=[  # Reals which change with time, but we don't have the future. Like the target
        "energy_consumption"
    ],  
    group_ids=[  # Feature or list of features which uniquely identifies each entity
        "LCLid"
    ],  
)

### Creating a continuous time index for PyTorch Forecasting

In [11]:
# Combining train and test with a flag
train_df['train'] = True
test_df['train'] = False
data = pd.concat([train_df, test_df])
del train_df, test_df

# Adding the time index
data['time_idx'] = data.timestamp.apply(lambda x: x.value)
data["_min_time_idx"] = data.groupby("LCLid", observed=True)['time_idx'].transform("min")

diff = data.iloc[1]['time_idx'] - data.iloc[0]['time_idx']
data['time_idx'] = ((data['time_idx'] - data['_min_time_idx']) / diff).astype(int)
data.drop(columns="_min_time_idx", inplace=True)

# separating to train and test
train_df = data.loc[data.train]
test_df = data.loc[~data.train]
del data

### Converting the categoricals to `object` dtype

In [12]:
train_df[
    feat_config.static_categoricals + feat_config.time_varying_known_categoricals
] = train_df[
    feat_config.static_categoricals + feat_config.time_varying_known_categoricals
].astype(
    "object"
)

test_df[
    feat_config.static_categoricals + feat_config.time_varying_known_categoricals
] = test_df[
    feat_config.static_categoricals + feat_config.time_varying_known_categoricals
].astype(
    "object"
)

### Handling Missing Values

In [13]:
#Checking missing values
n = train_df.isna().any()
n[n]

visibility                                        True
windBearing                                       True
temperature                                       True
dewPoint                                          True
pressure                                          True
windSpeed                                         True
precipType                                        True
icon                                              True
humidity                                          True
summary                                           True
energy_consumption_lag_1                          True
energy_consumption_lag_2                          True
energy_consumption_lag_3                          True
energy_consumption_lag_4                          True
energy_consumption_lag_5                          True
energy_consumption_lag_46                         True
energy_consumption_lag_47                         True
energy_consumption_lag_48                         True
energy_con

# Training Global Models

In [14]:
import pytorch_lightning as pl
pl.seed_everything(42)
import torch

from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE, MAE

Global seed set to 42


In [15]:
# # Load the TensorBoard notebook extension
# %load_ext tensorboard
# os.makedirs("lightning_logs", exist_ok=True)
# %tensorboard --logdir lightning_logs/

# Or start the tensorboard in a separate command prompt/terminal using
# tensorboard --logdir lightning_logs/

### Config

In [16]:
max_prediction_length = 1
max_encoder_length = 48 * 2
batch_size = 512  # set this to a value which your GPU can handle
train_model = False # Set this to True to train model. Else will load saved models ! Warning! Training on full dataset takes 3-6 hours

In [17]:
metric_record = []
individual_metrics = dict()

### Creating dataframes for train, val and test

In [18]:
train_df.timestamp.max(), test_df.timestamp.min()

(Timestamp('2013-12-31 23:30:00'), Timestamp('2014-01-01 00:00:00'))

In [19]:
# Adding 2 days of history (48 * 2) to create the samples
history_cutoff = train_df.timestamp.max() - pd.Timedelta(2, "D")
hist_df = train_df[train_df.timestamp > history_cutoff]

print(f"History Min: {hist_df.timestamp.min()} | Max: {hist_df.timestamp.max()} | Length: {len(hist_df.timestamp.unique())}")

History Min: 2013-12-30 00:00:00 | Max: 2013-12-31 23:30:00 | Length: 96


In [20]:
# Keeping 1 days aside as a validation set
cutoff = train_df.timestamp.max() - pd.Timedelta(1, "D")

# Adding 2 days of history (48 * 2) to create the samples
history_cutoff = train_df.timestamp.max() - pd.Timedelta(3, "D")

val_df = train_df[train_df.timestamp > cutoff].reset_index(drop=True)
val_history = train_df[(train_df.timestamp >= history_cutoff) & (train_df.timestamp <= cutoff)].reset_index(drop=True)
train_df = train_df[train_df.timestamp <= cutoff].reset_index(drop=True)

print("Split Timestamps:")
print(f"Train Max: {train_df.timestamp.max()} | Val History Min and Max: {val_history.timestamp.min(), val_history.timestamp.max()} | Val Min and Max: {val_df.timestamp.min(), val_df.timestamp.max()}")
print(f"Val History Size: {len(val_history.timestamp.unique())} | Val Size: {len(val_df.timestamp.unique())}")

Split Timestamps:
Train Max: 2013-12-30 23:30:00 | Val History Min and Max: (Timestamp('2013-12-28 23:30:00'), Timestamp('2013-12-30 23:30:00')) | Val Min and Max: (Timestamp('2013-12-31 00:00:00'), Timestamp('2013-12-31 23:30:00'))
Val History Size: 97 | Val Size: 48


In [21]:
pred_df = test_df[feat_config.index_cols + [feat_config.target] + ['time_idx']].copy()
# pred_df.set_index(feat_config.index_cols, inplace=True)

In [22]:
cols = feat_config.index_cols + [feat_config.target]
full_df = pd.concat([train_df[cols], val_df[cols]]).set_index(feat_config.index_cols)

## Baseline

Using just the history of the time series

In [23]:
tag = "simple"

### Converting data into TimeSeriesDataset from PyTorch Forecasting

In [24]:
# Defining the training dataset
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=feat_config.target,
    group_ids=feat_config.group_ids,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_unknown_reals=[
        "energy_consumption",
    ],
    target_normalizer=GroupNormalizer(
        groups=feat_config.group_ids, transformation=None
    )
)

# Defining the validation dataset with the same parameters as training
validation = TimeSeriesDataSet.from_dataset(training, pd.concat([val_history, val_df]).reset_index(drop=True), stop_randomization=True)

# Defining the test dataset with the same parameters as training
test = TimeSeriesDataSet.from_dataset(training, pd.concat([hist_df, test_df]).reset_index(drop=True), stop_randomization=True)

In [25]:
# Making the dataloaders
# num_workers can be increased in linux to speed-up training
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [26]:
# Testing the dataloader
x, y = next(iter(train_dataloader))
print("\nsizes of x =")

for key, value in x.items():
    print(f"\t{key} = {value.size()}")

print("\nsize of y =")
print(f"\ty = {y[0].size()}")


sizes of x =
	encoder_cat = torch.Size([512, 96, 0])
	encoder_cont = torch.Size([512, 96, 1])
	encoder_target = torch.Size([512, 96])
	encoder_lengths = torch.Size([512])
	decoder_cat = torch.Size([512, 1, 0])
	decoder_cont = torch.Size([512, 1, 1])
	decoder_target = torch.Size([512, 1])
	decoder_lengths = torch.Size([512])
	decoder_time_idx = torch.Size([512, 1])
	groups = torch.Size([512, 1])
	target_scale = torch.Size([512, 2])

size of y =
	y = torch.Size([512, 1])


### Creating the Model

In [27]:
# Importing the skeleton and helper models from src
from src.dl.ptf_models import SingleStepRNN, SingleStepRNNModel

**The helper model which is a custom PyTorch Forecasting model is as below:**

It is designed for simple RNN based forecasting, which is what we will be using to show different strategies of global modelling
```python
class SingleStepRNNModel(BaseModel):
    def __init__(self, network_callable: Callable, model_params: Dict, **kwargs):
        # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
        self.save_hyperparameters(model_params)
        # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
        super().__init__(**kwargs)
        self.network = network_callable(**model_params)

    def forward(self, x: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        prediction = self.network(x)
        # rescale predictions into target space
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])
        # We need to return a dictionary that at least contains the prediction
        # The parameter can be directly forwarded from the input.
        # The conversion to a named tuple can be directly achieved with the `to_network_output` function.
        return self.to_network_output(prediction=prediction)
```

### Defining the Forward function

In [28]:
from typing import Dict

class SimpleRNNModel(SingleStepRNN):
    def __init__(
        self,
        rnn_type: str,
        input_size: int,
        hidden_size: int,
        num_layers: int,
        bidirectional: bool,
    ):
        super().__init__(rnn_type, input_size, hidden_size, num_layers, bidirectional)

    def forward(self, x: Dict):
        # Using the encoder continuous which has the history window
        x = x["encoder_cont"] # x --> (batch_size, seq_len, input_size)
        # Processing through the RNN
        x, _ = self.rnn(x)  # --> (batch_size, seq_len, hidden_size)
        # Using a FC layer on last hidden state
        x = self.fc(x[:, -1, :])  # --> (batch_size, seq_len, 1)
        return x

In [29]:
model_params = dict(
    rnn_type="LSTM",
    input_size=len(training.reals),
    hidden_size=256, #128
    num_layers=2,
    bidirectional=False,
)

other_params = dict(
    learning_rate=5e-5,
    optimizer="adam",
    loss=RMSE(),
    logging_metrics=[RMSE(), MAE()],
)

In [30]:
model = SingleStepRNNModel.from_dataset(
    training,
    network_callable=SimpleRNNModel,
    model_params=model_params,
    **other_params
)
#Testing out the model
x, y = next(iter(train_dataloader))
_ = model(x)
type(_), _.prediction.shape

/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.


(pytorch_forecasting.utils.TupleOutputMixIn.to_network_output.<locals>.Output,
 torch.Size([512, 1]))

### Training the model

In [31]:
saved_model_sampled = 'notebooks/Chapter15/saved_weights/baseline_sampled.wt'
saved_model_full = 'notebooks/Chapter15/saved_weights/baseline.wt'

In [32]:
if train_model:
    trainer = pl.Trainer(
        auto_select_gpus=True,
        gpus=-1,
        min_epochs=1,
        max_epochs=20,
        callbacks=[
            pl.callbacks.EarlyStopping(monitor="val_loss", patience=3 if TRAIN_SUBSAMPLE else 4*3),
            pl.callbacks.ModelCheckpoint(
                monitor="val_loss", save_last=True, mode="min", auto_insert_metric_name=True
            ),
        ],
        val_check_interval=1.0 if TRAIN_SUBSAMPLE else 2000,
        log_every_n_steps=50 if TRAIN_SUBSAMPLE else 2000,
    )
    trainer.fit(
        model,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
    #Loading the best model
    best_model_path = trainer.checkpoint_callback.best_model_path
    best_model = SingleStepRNNModel.load_from_checkpoint(best_model_path)
    print(f"Loading the best model from: {best_model_path}")
    shutil.copy(best_model_path, saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full)
else:
    best_model_path = saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full
    load_weights(model, best_model_path)
    best_model =  model
    print ("Skipping Training and loading the model from {best_model_path}")

Skipping Training and loading the model from {best_model_path}


In [33]:
# Predicting on the test dataset and storing in a df
# pred, index = best_model.predict(test, return_index=True)

pred = best_model.predict(test, return_index=True)
index = pred.index
pred = pred.output.cpu().numpy()

index[tag] = pred
pred_df = pred_df.reset_index().merge(index, on=["time_idx", "LCLid"], how='left').set_index(feat_config.index_cols)

# Evaluating the forecast
agg_metrics, eval_metrics_df = evaluate_forecast(
    pred_df = pred_df,
    train_data = full_df,
    fc_column=tag,
    name=tag,
)
metric_record.append(agg_metrics)
individual_metrics[tag] = eval_metrics_df


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
  if (x["decoder_lengths"] < x["decoder_lengths"].max()).any():


Calculating metrics...:   0%|          | 0/10 [00:00<?, ?it/s]

In [34]:
pd.DataFrame(metric_record)

Unnamed: 0,Algorithm,MAE,MSE,meanMASE,Forecast Bias
0,simple,0.07867,0.02501,0.931383,-3.655122


## Baseline + Time-varying Information

Using the history of the time series, static, and time varying features

In [35]:
tag = "simple+time_varying"

### Converting data into TimeSeriesDataset from PyTorch Forecasting

In [36]:
# Defining the training dataset
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=feat_config.target,
    group_ids=feat_config.group_ids,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=feat_config.time_varying_known_reals,
    time_varying_unknown_reals=[
        "energy_consumption",
    ],
    target_normalizer=GroupNormalizer(
        groups=feat_config.group_ids, transformation=None
    )
)

# Defining the validation dataset with the same parameters as training
validation = TimeSeriesDataSet.from_dataset(training, pd.concat([val_history,val_df]).reset_index(drop=True), stop_randomization=True)

# Defining the test dataset with the same parameters as training
test = TimeSeriesDataSet.from_dataset(training, pd.concat([hist_df, test_df]).reset_index(drop=True), stop_randomization=True)

In [37]:
# Making the dataloaders
# num_workers can be increased in linux to speed-up training
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [38]:
# Testing the dataloader
x, y = next(iter(train_dataloader))
print("\nsizes of x =")

for key, value in x.items():
    print(f"\t{key} = {value.size()}")

print("\nsize of y =")
print(f"\ty = {y[0].size()}")


sizes of x =
	encoder_cat = torch.Size([512, 96, 0])
	encoder_cont = torch.Size([512, 96, 2])
	encoder_target = torch.Size([512, 96])
	encoder_lengths = torch.Size([512])
	decoder_cat = torch.Size([512, 1, 0])
	decoder_cont = torch.Size([512, 1, 2])
	decoder_target = torch.Size([512, 1])
	decoder_lengths = torch.Size([512])
	decoder_time_idx = torch.Size([512, 1])
	groups = torch.Size([512, 1])
	target_scale = torch.Size([512, 2])

size of y =
	y = torch.Size([512, 1])


### Creating the Model

We have to make a few tweaks to our previous model to include time-warying information

### [Additional] Rolling the input and formatting the input

In [39]:
# Concatenating the encoder and decoder series: (encoder(t), decoder(t-1))
x_cont = torch.cat([x["encoder_cont"], x["decoder_cont"]], dim=1)
x_cont.shape

torch.Size([512, 97, 2])

In [40]:
# Let's see what the first batch target is
x_cont[0, :, -1]

tensor([-7.1601e-01, -2.9184e-01, -9.0544e-03,  2.0303e-01,  2.4865e+00,
         9.1706e-01, -3.2718e-01, -3.9788e-01, -5.3927e-01, -7.2308e-01,
        -7.1601e-01, -7.1601e-01,  4.2926e-01, -4.0495e-01, -7.2308e-01,
        -7.1601e-01, -5.1472e-02, -3.9788e-01, -6.0997e-01, -7.1601e-01,
        -4.1202e-01,  4.2926e-01,  5.4944e-01,  1.0514e+00, -3.7333e-02,
        -3.5546e-01,  1.9224e-02,  6.5548e-01,  3.8684e-01,  8.8171e-01,
         6.3427e-01,  5.4944e-01, -3.0598e-01,  1.2705e+00,  2.9494e-01,
        -4.2616e-01, -7.1601e-01, -6.9480e-01, -3.9081e-01, -5.8876e-01,
        -7.1601e-01, -7.2308e-01, -5.1099e-01, -4.4737e-01, -7.1601e-01,
        -7.2308e-01, -7.0894e-01, -1.6124e-02, -5.6755e-01, -2.4235e-01,
         2.1717e-01,  1.8889e-01,  1.3554e+00,  1.4756e+00, -3.5546e-01,
        -4.2616e-01, -4.4030e-01, -7.2308e-01,  1.2527e-01,  1.1362e+00,
        -3.9081e-01, -7.2308e-01, -7.1601e-01, -5.2513e-01,  6.8711e-02,
         5.6358e-01, -1.9848e-03, -4.1202e-01, -1.9

In [41]:
# Rolling the target by one step
x_cont[:, :, -1] = torch.roll(x_cont[:, :, -1], 1, dims=1)

In [42]:
# Let's see the first batch target again.
x_cont[0, :, -1]
# We can see that it has shifted to the right by one step

tensor([-3.2011e-01, -7.1601e-01, -2.9184e-01, -9.0544e-03,  2.0303e-01,
         2.4865e+00,  9.1706e-01, -3.2718e-01, -3.9788e-01, -5.3927e-01,
        -7.2308e-01, -7.1601e-01, -7.1601e-01,  4.2926e-01, -4.0495e-01,
        -7.2308e-01, -7.1601e-01, -5.1472e-02, -3.9788e-01, -6.0997e-01,
        -7.1601e-01, -4.1202e-01,  4.2926e-01,  5.4944e-01,  1.0514e+00,
        -3.7333e-02, -3.5546e-01,  1.9224e-02,  6.5548e-01,  3.8684e-01,
         8.8171e-01,  6.3427e-01,  5.4944e-01, -3.0598e-01,  1.2705e+00,
         2.9494e-01, -4.2616e-01, -7.1601e-01, -6.9480e-01, -3.9081e-01,
        -5.8876e-01, -7.1601e-01, -7.2308e-01, -5.1099e-01, -4.4737e-01,
        -7.1601e-01, -7.2308e-01, -7.0894e-01, -1.6124e-02, -5.6755e-01,
        -2.4235e-01,  2.1717e-01,  1.8889e-01,  1.3554e+00,  1.4756e+00,
        -3.5546e-01, -4.2616e-01, -4.4030e-01, -7.2308e-01,  1.2527e-01,
         1.1362e+00, -3.9081e-01, -7.2308e-01, -7.1601e-01, -5.2513e-01,
         6.8711e-02,  5.6358e-01, -1.9848e-03, -4.1

In [43]:
x_cont.shape

torch.Size([512, 97, 2])

In [44]:
# Dropping the first timestep
x_cont[0, 1:, :].shape

torch.Size([96, 2])

### Defining the Forward function

In [45]:
from typing import Dict

class DynamicFeatureRNNModel(SingleStepRNN):
    def __init__(
        self,
        rnn_type: str,
        input_size: int,
        hidden_size: int,
        num_layers: int,
        bidirectional: bool,
    ):
        super().__init__(rnn_type, input_size, hidden_size, num_layers, bidirectional)

    def forward(self, x: Dict):
        # Using the encoder and decoder sequence (explanation in the book)
        x_cont = torch.cat([x["encoder_cont"],x["decoder_cont"]], dim=1)
        # Roll target by 1 (explanation in the book)
        x_cont[:, :, -1] = torch.roll(x_cont[:, :, -1], 1, dims=1)
        x = x_cont
        # dropping first timestep (explanation in the book)
        x = x[:, 1:, :] # x --> (batch_size, seq_len, input_size)
        # Processing through the RNN
        x, _ = self.rnn(x)  # --> (batch_size, seq_len, hidden_size)
        # Using a FC layer on last hidden state
        x = self.fc(x[:, -1, :])  # --> (batch_size, seq_len, 1)
        return x

In [46]:
model_params = dict(
    rnn_type="LSTM",
    input_size=len(training.reals),
    hidden_size=256, #128
    num_layers=2,
    bidirectional=False,
)

other_params = dict(
    learning_rate=5e-5,
    optimizer="adam",
    loss=RMSE(),
    logging_metrics=[RMSE(), MAE()],
)

In [47]:
model = SingleStepRNNModel.from_dataset(
    training,
    network_callable=DynamicFeatureRNNModel,
    model_params=model_params,
    **other_params
)

#Testing out the model
x, y = next(iter(train_dataloader))
_ = model(x)

type(_), _.prediction.shape

/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.


(pytorch_forecasting.utils.TupleOutputMixIn.to_network_output.<locals>.Output,
 torch.Size([512, 1]))

### Training the model

In [48]:
saved_model_sampled = 'notebooks/Chapter15/saved_weights/baseline_time_varying_sampled.wt'
saved_model_full = 'notebooks/Chapter15/saved_weights/baseline_time_varying.wt'

In [49]:
if train_model:
    trainer = pl.Trainer(
        accelerator='cpu',
        min_epochs=1,
        max_epochs=20,
        callbacks=[
            pl.callbacks.EarlyStopping(monitor="val_loss", patience=3 if TRAIN_SUBSAMPLE else 4*3),
            pl.callbacks.ModelCheckpoint(
                monitor="val_loss", save_last=True, mode="min", auto_insert_metric_name=True
            ),
        ],
        val_check_interval=1.0 if TRAIN_SUBSAMPLE else 2000,
        log_every_n_steps=50 if TRAIN_SUBSAMPLE else 2000,
        # fast_dev_run=True
        # precision = 16
    )
    trainer.fit(
        model,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
    #Loading the best model
    best_model_path = trainer.checkpoint_callback.best_model_path
    best_model = SingleStepRNNModel.load_from_checkpoint(best_model_path)
    print(f"Loading the best model from: {best_model_path}")
    shutil.copy(best_model_path, saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full)
else:
    best_model_path = saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full
    load_weights(model, best_model_path)
    best_model =  model
    best_model = best_model.cpu()  # Mover a CPU
    print ("Skipping Training and loading the model from {best_model_path}")

Skipping Training and loading the model from {best_model_path}


In [50]:
# Predicting on the test dataset and storing in a df
pred = best_model.predict(test, return_index=True)
index = pred.index
pred = pred.output.cpu().numpy()

index[tag] = pred
pred_df = pred_df.reset_index().merge(index, on=["time_idx", "LCLid"], how='left').set_index(feat_config.index_cols)

# Evaluating the forecast
agg_metrics, eval_metrics_df = evaluate_forecast(
    pred_df = pred_df,
    train_data = full_df,
    fc_column=tag,
    name=tag,
)
metric_record.append(agg_metrics)
individual_metrics[tag]=eval_metrics_df

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
  x_cont[:, :, -1] = torch.roll(x_cont[:, :, -1], 1, dims=1)


Calculating metrics...:   0%|          | 0/10 [00:00<?, ?it/s]

In [52]:
pd.DataFrame(metric_record).style.format(
    {"MAE": "{:.4f}", "MSE": "{:.4f}", "meanMASE": "{:.4f}", "Forecast Bias": "{:.2f}%"}
).highlight_min(color="lightgreen", subset=["MAE", "MSE", "meanMASE"]).apply(
    highlight_abs_min,
    props="color:black;background-color:lightgreen",
    axis=0,
    subset=["Forecast Bias"],
)

Unnamed: 0,Algorithm,MAE,MSE,meanMASE,Forecast Bias
0,simple,0.0787,0.025,0.9314,-3.66%
1,simple+time_varying,0.0796,0.0243,0.9419,-1.83%


## Baseline + Static + Time-varying Information

Using the history of the time series, static, and time varying features

In [53]:
tag = "simple+static+time_varying"

### Converting data into TimeSeriesDataset from PyTorch Forecasting

In [54]:
# Defining the training dataset
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=feat_config.target,
    group_ids=feat_config.group_ids,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=feat_config.static_categoricals,
    static_reals=feat_config.static_reals,
    time_varying_known_categoricals=feat_config.time_varying_known_categoricals,
    time_varying_known_reals=feat_config.time_varying_known_reals,
    time_varying_unknown_reals=[
        "energy_consumption",
    ],
    target_normalizer=GroupNormalizer(
        groups=feat_config.group_ids, transformation=None
    )
)

# Defining the validation dataset with the same parameters as training
validation = TimeSeriesDataSet.from_dataset(training, pd.concat([val_history,val_df]).reset_index(drop=True), stop_randomization=True)

# Defining the test dataset with the same parameters as training
test = TimeSeriesDataSet.from_dataset(training, pd.concat([hist_df, test_df]).reset_index(drop=True), stop_randomization=True)

In [55]:
# Making the dataloaders
# num_workers can be increased in linux to speed-up training
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [57]:
# Testing the dataloader
x, y = next(iter(train_dataloader))
print("\nsizes of x =")

for key, value in x.items():
    print(f"\t{key} = {value.size()}")

print("\nsize of y =")
print(f"\ty = {y[0].size()}")


sizes of x =
	encoder_cat = torch.Size([512, 96, 7])
	encoder_cont = torch.Size([512, 96, 2])
	encoder_target = torch.Size([512, 96])
	encoder_lengths = torch.Size([512])
	decoder_cat = torch.Size([512, 1, 7])
	decoder_cont = torch.Size([512, 1, 2])
	decoder_target = torch.Size([512, 1])
	decoder_lengths = torch.Size([512])
	decoder_time_idx = torch.Size([512, 1])
	groups = torch.Size([512, 1])
	target_scale = torch.Size([512, 2])

size of y =
	y = torch.Size([512, 1])


### Creating the Model

We need to make some more tweaks to include static variables

### Defining the Forward function

#### Defining the embedding sizes for each categorical variable

In [58]:
# Using a thumbrule to calculate the embedding sizes
# Finding the cardinality using the categorical encoders in the dataset
cardinality = [len(training.categorical_encoders[c].classes_) for c in training.categoricals]

# using the cardinality list to create embedding sizes
embedding_sizes = [
    (x, min(50, (x + 1) // 2))
    for x in cardinality
]

In [59]:
from typing import Dict

class StaticDynamicFeatureRNNModel(SingleStepRNN):
    def __init__(
        self,
        rnn_type: str,
        input_size: int,
        hidden_size: int,
        num_layers: int,
        bidirectional: bool,
        embedding_sizes = []
    ):
        super().__init__(rnn_type, input_size, hidden_size, num_layers, bidirectional)
        self.embeddings = torch.nn.ModuleList(
            [torch.nn.Embedding(card, size) for card, size in embedding_sizes]
        )

    def forward(self, x: Dict):
        # Using the encoder and decoder sequence (explanation in the book)
        x_cont = torch.cat([x["encoder_cont"], x["decoder_cont"]], dim=1)

        # Roll target by 1 (explanation in the book)
        x_cont[:, :, -1] = torch.roll(x_cont[:, :, -1], 1, dims=1)

        # Combine the encoder and decoder categoricals (explanation in the book)
        cat = torch.cat([x["encoder_cat"], x["decoder_cat"]], dim=1)

        # if there are categorical features
        if cat.size(-1) > 0:
            # concatenating all the embedding vectors
            x_cat = torch.cat([emb(cat[:, :, i]) for i, emb in enumerate(self.embeddings)], dim=-1)

            # concatenating continuous and categorical
            x = torch.cat([x_cont, x_cat], dim=-1)
        else:
            x = x_cont

        # dropping first timestep (explanation in the book)
        x = x[:, 1:, :] # x --> (batch_size, seq_len, input_size)

        # Processing through the RNN
        x, _ = self.rnn(x)  # --> (batch_size, seq_len, hidden_size)

        # Using a FC layer on last hidden state
        x = self.fc(x[:, -1, :])  # --> (batch_size, seq_len, 1)
        return x

In [71]:
model_params = dict(
    rnn_type="LSTM",
    input_size=len(training.reals)+sum([s for _, s in embedding_sizes]),
    hidden_size=256, #128
    num_layers=2,
    bidirectional=False,
    embedding_sizes=embedding_sizes,
)

other_params = dict(
    learning_rate=5e-5,
    optimizer="adam",
    loss=RMSE(),
    logging_metrics=[RMSE(), MAE()],
)

In [61]:
model = SingleStepRNNModel.from_dataset(
    training,
    network_callable=StaticDynamicFeatureRNNModel,
    model_params=model_params,
    **other_params
)
#Testing out the model
x, y = next(iter(train_dataloader))
_ = model(x)

type(_), _.prediction.shape

/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.


(pytorch_forecasting.utils.TupleOutputMixIn.to_network_output.<locals>.Output,
 torch.Size([512, 1]))

### Training the model

In [62]:
saved_model_sampled = 'notebooks/Chapter15/saved_weights/baseline_time_varying_static_sampled.wt'
saved_model_full = 'notebooks/Chapter15/saved_weights/baseline_time_varying_static.wt'

In [63]:
if train_model:
    trainer = pl.Trainer(
        auto_select_gpus=True,
        gpus=-1,
        min_epochs=1,
        max_epochs=20,
        callbacks=[
            pl.callbacks.EarlyStopping(monitor="val_loss", patience=3 if TRAIN_SUBSAMPLE else 4*3),
            pl.callbacks.ModelCheckpoint(
                monitor="val_loss", save_last=True, mode="min", auto_insert_metric_name=True
            ),
        ],
        val_check_interval=1.0 if TRAIN_SUBSAMPLE else 2000,
        log_every_n_steps=50 if TRAIN_SUBSAMPLE else 2000,
        # fast_dev_run=True
        # precision = 16
    )
    trainer.fit(
        model,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
    #Loading the best model
    best_model_path = trainer.checkpoint_callback.best_model_path
    best_model = SingleStepRNNModel.load_from_checkpoint(best_model_path)
    print(f"Loading the best model from: {best_model_path}")
    shutil.copy(best_model_path, saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full)
else:
    best_model_path = saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full
    load_weights(model, best_model_path)
    best_model =  model
    print ("Skipping Training and loading the model from {best_model_path}")

Skipping Training and loading the model from {best_model_path}


In [65]:
# Predicting on the test dataset and storing in a df
pred = best_model.predict(test, return_index=True)
index = pred.index
pred = pred.output.cpu().numpy()

index[tag] = pred
pred_df = pred_df.reset_index().merge(index, on=["time_idx", "LCLid"], how='left').set_index(feat_config.index_cols)


# Evaluating the forecast
agg_metrics, eval_metrics_df = evaluate_forecast(
    pred_df = pred_df,
    train_data = full_df,
    fc_column=tag,
    name=tag,
)
metric_record.append(agg_metrics)
individual_metrics[tag]=eval_metrics_df

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Calculating metrics...:   0%|          | 0/10 [00:00<?, ?it/s]

In [66]:
pd.DataFrame(metric_record).style.format(
    {"MAE": "{:.4f}", "MSE": "{:.4f}", "meanMASE": "{:.4f}", "Forecast Bias": "{:.2f}%"}
).highlight_min(color="lightgreen", subset=["MAE", "MSE", "meanMASE"]).apply(
    highlight_abs_min,
    props="color:black;background-color:lightgreen",
    axis=0,
    subset=["Forecast Bias"],
)

Unnamed: 0,Algorithm,MAE,MSE,meanMASE,Forecast Bias
0,simple,0.0787,0.025,0.9314,-3.66%
1,simple+time_varying,0.0796,0.0243,0.9419,-1.83%
2,simple+static+time_varying,0.076,0.0227,0.904,-5.45%


## Baseline + Static + Time-varying Information + Scale

Using the history of the time series, static, time varying features and scale

In [72]:
tag = "simple+static+time_varying+scale"

### Converting data into TimeSeriesDataset from PyTorch Forecasting

In [73]:
# Defining the training dataset
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=feat_config.target,
    group_ids=feat_config.group_ids,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=feat_config.static_categoricals,
    static_reals=feat_config.static_reals,
    time_varying_known_categoricals=feat_config.time_varying_known_categoricals,
    time_varying_known_reals=feat_config.time_varying_known_reals,
    time_varying_unknown_reals=[
        "energy_consumption",
    ],
    target_normalizer=GroupNormalizer(
        groups=feat_config.group_ids, transformation=None
    ),
    add_target_scales=True  # ----> New!
)

# Defining the validation dataset with the same parameters as training
validation = TimeSeriesDataSet.from_dataset(training, pd.concat([val_history,val_df]).reset_index(drop=True), stop_randomization=True)

# Defining the test dataset with the same parameters as training
test = TimeSeriesDataSet.from_dataset(training, pd.concat([hist_df, test_df]).reset_index(drop=True), stop_randomization=True)

In [74]:
# Making the dataloaders
# num_workers can be increased in linux to speed-up training
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [75]:
# Testing the dataloader
x, y = next(iter(train_dataloader))
print("\nsizes of x =")

for key, value in x.items():
    print(f"\t{key} = {value.size()}")

print("\nsize of y =")
print(f"\ty = {y[0].size()}")


sizes of x =
	encoder_cat = torch.Size([512, 96, 7])
	encoder_cont = torch.Size([512, 96, 4])
	encoder_target = torch.Size([512, 96])
	encoder_lengths = torch.Size([512])
	decoder_cat = torch.Size([512, 1, 7])
	decoder_cont = torch.Size([512, 1, 4])
	decoder_target = torch.Size([512, 1])
	decoder_lengths = torch.Size([512])
	decoder_time_idx = torch.Size([512, 1])
	groups = torch.Size([512, 1])
	target_scale = torch.Size([512, 2])

size of y =
	y = torch.Size([512, 1])


### Creating the Model

### Defining the Forward function

In [76]:
cardinality = [len(training.categorical_encoders[c].classes_) for c in training.categoricals]
embedding_sizes = [
    (x, min(50, (x + 1) // 2))
    for x in cardinality
]

In [77]:
model_params = dict(
    rnn_type="LSTM",
    input_size=len(training.reals)+sum([s for _, s in embedding_sizes]),
    hidden_size=256, #128
    num_layers=2,
    bidirectional=False,
    embedding_sizes=embedding_sizes,
)

other_params = dict(
    learning_rate=5e-5,
    optimizer="adam",
    loss=RMSE(),
    logging_metrics=[RMSE(), MAE()],
)

In [78]:
model = SingleStepRNNModel.from_dataset(
    training,
    network_callable=StaticDynamicFeatureRNNModel,
    model_params = model_params,
    **other_params
)

#Testing out the model
x, y = next(iter(train_dataloader))
_ = model(x)
type(_), _.prediction.shape

/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.


(pytorch_forecasting.utils.TupleOutputMixIn.to_network_output.<locals>.Output,
 torch.Size([512, 1]))

### Training the model

In [79]:
saved_model_sampled = 'notebooks/Chapter15/saved_weights/baseline_time_varying_static_scale_sampled.wt'
saved_model_full = 'notebooks/Chapter15/saved_weights/baseline_time_varying_static_scale.wt'

In [80]:
if train_model:
    trainer = pl.Trainer(
        auto_select_gpus=True,
        gpus=-1,
        min_epochs=1,
        max_epochs=20,
        callbacks=[
            pl.callbacks.EarlyStopping(monitor="val_loss", patience=3 if TRAIN_SUBSAMPLE else 4*3),
            pl.callbacks.ModelCheckpoint(
                monitor="val_loss", save_last=True, mode="min", auto_insert_metric_name=True
            ),
        ],
        val_check_interval=1.0 if TRAIN_SUBSAMPLE else 2000,
        log_every_n_steps=50 if TRAIN_SUBSAMPLE else 2000,
        # fast_dev_run=True
        # precision = 16
    )
    trainer.fit(
        model,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
    #Loading the best model
    best_model_path = trainer.checkpoint_callback.best_model_path
    best_model = SingleStepRNNModel.load_from_checkpoint(best_model_path)
    print(f"Loading the best model from: {best_model_path}")
    shutil.copy(best_model_path, saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full)
else:
    best_model_path = saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full
    load_weights(model, best_model_path)
    best_model =  model
    print ("Skipping Training and loading the model from {best_model_path}")


Skipping Training and loading the model from {best_model_path}


In [81]:
# Predicting on the test dataset and storing in a df
pred = best_model.predict(test, return_index=True)
index = pred.index
pred = pred.output.cpu().numpy()

index[tag] = pred
pred_df = pred_df.reset_index().merge(index, on=["time_idx", "LCLid"], how='left').set_index(feat_config.index_cols)

# Evaluating the forecast
agg_metrics, eval_metrics_df = evaluate_forecast(
    pred_df = pred_df,
    train_data = full_df,
    fc_column=tag,
    name=tag,
)
metric_record.append(agg_metrics)
individual_metrics[tag]=eval_metrics_df

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/mlevydaniel/miniforge3/envs/pytorch_m1/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Calculating metrics...:   0%|          | 0/10 [00:00<?, ?it/s]

In [83]:
pd.DataFrame(metric_record).style.format(
    {"MAE": "{:.4f}", "MSE": "{:.4f}", "meanMASE": "{:.4f}", "Forecast Bias": "{:.2f}%"}
).highlight_min(color="lightgreen", subset=["MAE", "MSE", "meanMASE"]).apply(
    highlight_abs_min,
    props="color:black;background-color:lightgreen",
    axis=0,
    subset=["Forecast Bias"],
)

Unnamed: 0,Algorithm,MAE,MSE,meanMASE,Forecast Bias
0,simple,0.0787,0.025,0.9314,-3.66%
1,simple+time_varying,0.0796,0.0243,0.9419,-1.83%
2,simple+static+time_varying,0.076,0.0227,0.904,-5.45%
3,simple+static+time_varying+scale,0.0775,0.0242,0.9202,-5.40%


## Baseline + Static + Time-varying Information + Samplers

Using the history of the time series, static, and time varying features

In [154]:
tag = "simple+static+time_varying+num_sampler"

### Converting data into TimeSeriesDataset from PyTorch Forecasting

In [155]:
# Defining the training dataset
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=feat_config.target,
    group_ids=feat_config.group_ids,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=feat_config.static_categoricals,
    static_reals=feat_config.static_reals,
    time_varying_known_categoricals=feat_config.time_varying_known_categoricals,
    time_varying_known_reals=feat_config.time_varying_known_reals,
    time_varying_unknown_reals=[
        "energy_consumption",
    ],
    target_normalizer=GroupNormalizer(
        groups=feat_config.group_ids, transformation=None
    )
)

# Defining the validation dataset with the same parameters as training
validation = TimeSeriesDataSet.from_dataset(training, pd.concat([val_history,val_df]).reset_index(drop=True), stop_randomization=True)

# Defining the test dataset with the same parameters as training
test = TimeSeriesDataSet.from_dataset(training, pd.concat([hist_df, test_df]).reset_index(drop=True), stop_randomization=True)

### With and Without Batch Samplers (Visualization)

In [156]:
from tqdm.auto import tqdm
import plotly.graph_objects as go
from collections import defaultdict

In [157]:
n_bins= 10
enc = training.categorical_encoders["__group_id__LCLid"]

# Calculating the length of each LCLid
counts = train_df.groupby("LCLid")['timestamp'].count()

# Binning the counts and renaming
out, bins = pd.cut(counts, bins=n_bins, retbins=True)
out = out.cat.rename_categories({
    c:f"bin_{i}" for i, c in enumerate(out.cat.categories) 
})

# TimeSeriesDataset stores a df as the index over which it samples
df = training.index.copy()

# Adding a bin column to it to represent the bins we have created
df['bins'] = [f"bin_{i}" for i in np.digitize(df["count"].values, bins)]

In [158]:
fig = px.histogram(out).update_xaxes(categoryorder="category ascending")
fig.update_layout(
        autosize=False,
        width=900,
        height=500,
    # title_text="",
        yaxis=dict(
            title_text="# of LCLids",
            titlefont=dict(size=14),
            tickfont=dict(size=14),
        ),
        xaxis=dict(
            title_text="Length Bins",
            titlefont=dict(size=14),
            tickfont=dict(size=14),
        ),
    showlegend=False
    )
fig.write_image("imgs/chapter_14/length_bin_dist.png")
fig.show()

In [159]:
def get_batch_count(train_dataloader, n=50):
    all_batches=defaultdict(list)
    for i,(x,y) in tqdm(enumerate(train_dataloader), total=n):
        
        # finding the LCLid using inverse transform
        grps = enc.inverse_transform(x["groups"])
        
        # getting the bins from the bins we create earlier
        bins = out[grps.ravel()]
        
        # value count and convert to dictionary
        curr = bins.value_counts().to_dict()
        
        # Adding the counts to all_batches
        for key in curr.keys():
            all_batches[key].append(curr.get(key))
        if i >= n:
            break

    return all_batches

def plot_batches(all_batches):

    # making a list for batch index
    x = [str(x) for x in range(len(all_batches[out[0]]))]
    data_append = []

    # For each bin adding a bar
    for bin_name in reversed(out.cat.categories):
        data_append.append(go.Bar(
                                   x=x,
                                   y=all_batches[bin_name],
                                   textposition='auto',name=bin_name))
    fig = go.Figure(data=data_append)
    return fig

#### Without Batch Samplers

In [160]:
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
all_batches = get_batch_count(train_dataloader, n=50)

  0%|          | 0/50 [00:00<?, ?it/s]

In [161]:
fig = plot_batches(all_batches)
fig.update_layout(
        autosize=False,
        width=900,
        height=500,
    # title_text="",
        yaxis=dict(
            title_text="Bin Frequency",
            titlefont=dict(size=14),
            tickfont=dict(size=14),
        ),
        xaxis=dict(
            title_text="Batch Number",
            titlefont=dict(size=14),
            tickfont=dict(size=14),
        ),
    barmode="stack",
    )
fig.write_image("imgs/chapter_14/wo_batchsamplers.png")
fig.show()


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



#### With Batch Samplers

We define a probability which makes the batch balanced across different batches.

We can use the bins of the length of time series and initializing the weights as 1/frequency of each bin. This makes the sampling focus more on low frequency bins and make single batch equal

In [180]:
# Calculate Weights as inverse counts of the bins
weights = 1 / df['bins'].value_counts(normalize=True)

# Assigning the weights back to the df so that we have an array of 
# weights in the same shape as the index over which we are going to sample
weights = weights.reset_index().rename(columns={"proportion": "weight"})
df = df.merge(weights, on='bins', how='left')
probabilities = df.weight.values

In [181]:
from torch.utils.data import WeightedRandomSampler

sampler = WeightedRandomSampler(probabilities, len(probabilities))

In [184]:
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0, sampler=sampler, shuffle=False)
all_batches = get_batch_count(train_dataloader, n=50)

  0%|          | 0/50 [00:00<?, ?it/s]

In [200]:
fig = plot_batches(all_batches)
fig.update_layout(
        autosize=False,
        width=900,
        height=500,
    # title_text="",
        yaxis=dict(
            title_text="Bin Frequency",
            titlefont=dict(size=14),
            tickfont=dict(size=14),
        ),
        xaxis=dict(
            title_text="Batch Number",
            titlefont=dict(size=14),
            tickfont=dict(size=14),
        ),
    barmode="stack",
    )
fig.write_image("imgs/chapter_14/with_batchsamplers.png")
fig.show()


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



#### Training with the custom sampler

In [204]:
# Making the dataloaders
# num_workers can be increased in linux to speed-up training
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0, sampler=sampler, shuffle=False)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [205]:
# Testing the dataloader
x, y = next(iter(train_dataloader))
print("\nsizes of x =")

for key, value in x.items():
    print(f"\t{key} = {value.size()}")

print("\nsize of y =")
print(f"\ty = {y[0].size()}")


sizes of x =
	encoder_cat = torch.Size([512, 96, 7])
	encoder_cont = torch.Size([512, 96, 2])
	encoder_target = torch.Size([512, 96])
	encoder_lengths = torch.Size([512])
	decoder_cat = torch.Size([512, 1, 7])
	decoder_cont = torch.Size([512, 1, 2])
	decoder_target = torch.Size([512, 1])
	decoder_lengths = torch.Size([512])
	decoder_time_idx = torch.Size([512, 1])
	groups = torch.Size([512, 1])
	target_scale = torch.Size([512, 2])

size of y =
	y = torch.Size([512, 1])


### Creating the Model

### Defining the Forward function

In [206]:
cardinality = [len(training.categorical_encoders[c].classes_) for c in training.categoricals]
embedding_sizes = [
    (x, min(50, (x + 1) // 2))
    for x in cardinality
]

In [207]:
model_params = dict(
    rnn_type="LSTM",
    input_size=len(training.reals)+sum([s for _, s in embedding_sizes]),
    hidden_size=256, #128
    num_layers=2,
    bidirectional=False,
    embedding_sizes=embedding_sizes,
)

other_params = dict(
    learning_rate=5e-5,
    optimizer="adam",
    loss=RMSE(),
    logging_metrics=[RMSE(), MAE()],
)

In [208]:
model = SingleStepRNNModel.from_dataset(
    training,
    network_callable=StaticDynamicFeatureRNNModel,
    model_params = model_params,
    **other_params
)
#Testing out the model
x, y = next(iter(train_dataloader))
_ = model(x)
type(_), _.prediction.shape


Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.



(pytorch_forecasting.utils.TupleOutputMixIn.to_network_output.<locals>.Output,
 torch.Size([512, 1]))

### Training the model

In [103]:
# best_model = SingleStepRNNModel.load_from_checkpoint(saved_model_sampled)

# torch.save(best_model.state_dict(),saved_model_sampled.replace("saved_models","saved_weights").replace("ckpt","wt"))

In [209]:
saved_model_sampled ='notebooks/Chapter15/saved_weights/baseline_time_varying_static_scale_weighted_sampler_sampled.wt'
saved_model_full ='notebooks/Chapter15/saved_weights/baseline_time_varying_static_scale_weighted_sampler.wt'

In [210]:
if train_model:
    trainer = pl.Trainer(
        auto_select_gpus=True,
        gpus=-1,
        min_epochs=1,
        max_epochs=20,
        callbacks=[
            pl.callbacks.EarlyStopping(monitor="val_loss", patience=3 if TRAIN_SUBSAMPLE else 4*3),
            pl.callbacks.ModelCheckpoint(
                monitor="val_loss", save_last=True, mode="min", auto_insert_metric_name=True
            ),
        ],
        val_check_interval=1.0 if TRAIN_SUBSAMPLE else 2000,
        log_every_n_steps=50 if TRAIN_SUBSAMPLE else 2000,
        # fast_dev_run=True
        # precision = 16
    )
    trainer.fit(
        model,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
    #Loading the best model
    best_model_path = trainer.checkpoint_callback.best_model_path
    best_model = SingleStepRNNModel.load_from_checkpoint(best_model_path)
    print(f"Loading the best model from: {best_model_path}")
    shutil.copy(best_model_path, saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full)
else:
    best_model_path = saved_model_sampled if TRAIN_SUBSAMPLE else saved_model_full
    load_weights(model, best_model_path)
    best_model =  model
    print ("Skipping Training and loading the model from {best_model_path}")

Skipping Training and loading the model from {best_model_path}


In [211]:
# Predicting on the test dataset and storing in a df
pred = best_model.predict(test, return_index=True)
index = pred.index
pred = pred.output.cpu().numpy()

index[tag] = pred
pred_df = pred_df.reset_index().merge(index, on=["time_idx", "LCLid"], how='left').set_index(feat_config.index_cols)

# Evaluating the forecast
agg_metrics, eval_metrics_df = evaluate_forecast(
    pred_df = pred_df,
    train_data = full_df,
    fc_column=tag,
    name=tag,
)
metric_record.append(agg_metrics)
individual_metrics[tag]=eval_metrics_df

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.



Calculating metrics...:   0%|          | 0/10 [00:00<?, ?it/s]

In [212]:
pd.DataFrame(metric_record).style.format(
    {"MAE": "{:.4f}", "MSE": "{:.4f}", "meanMASE": "{:.4f}", "Forecast Bias": "{:.2f}%"}
).highlight_min(color="lightgreen", subset=["MAE", "MSE", "meanMASE"]).apply(
    highlight_abs_min,
    props="color:black;background-color:lightgreen",
    axis=0,
    subset=["Forecast Bias"],
)

Unnamed: 0,Algorithm,MAE,MSE,meanMASE,Forecast Bias
0,simple,0.0787,0.025,0.9314,-3.66%
1,simple+time_varying,0.0796,0.0243,0.9419,-1.83%
2,simple+static+time_varying,0.076,0.0227,0.904,-5.45%
3,simple+static+time_varying+scale,0.0775,0.0242,0.9202,-5.40%
4,simple+static+time_varying+num_sampler,0.0786,0.0234,0.9316,-2.84%
