In [2]:
MODEL_NAME = "n-beats-hpam-v1"

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from multiprocessing.dummy import freeze_support
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)


from dotenv import load_dotenv
load_dotenv()
os.environ['WANDB_NOTEBOOK_NAME'] = 'n-beats.ipynb'
os.environ['WANDB_API_KEY'] = os.getenv('WANDB_API_KEY')


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

from darts import TimeSeries
from darts.models import NBEATSModel
from darts.dataprocessing.transformers import Scaler, MissingValuesFiller
from darts.metrics import mape, r2_score, rmse

from darts import TimeSeries

from darts.datasets import EnergyDataset

import helper
import glob
import progressbar

from pytorch_lightning.loggers import WandbLogger


AVAILABLE_GPUS = torch.cuda.device_count()
AVAILABLE_CPUS = os.cpu_count()

print(f"Available GPUs: {AVAILABLE_GPUS}")
print(f"Available CPUs: {AVAILABLE_CPUS}")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Available GPUs: 2
Available CPUs: 32


# Data

Taking the first 300 households from the London Dataset and converting them to a Darts TimeSeries.

In [4]:
my_time_series_dataset = []
for x in progressbar.progressbar(sorted(glob.glob("../../../Data/london_clean/*.csv"))[:2000]):
    df = pd.read_csv(f'{x}')
    df["DateTime"] = pd.to_datetime(df['DateTime'])
    #df = df.groupby(pd.Grouper(key='DateTime', freq='1D')).max("KWHhh").round(3).reset_index()
    series = TimeSeries.from_dataframe(df, time_col='DateTime', value_cols='KWHhh').astype(np.float32)
    my_time_series_dataset.append(series)


  0% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--
  1% (10 of 1000) |                      | Elapsed Time: 0:00:00 ETA:   0:00:10
  2% (20 of 1000) |                      | Elapsed Time: 0:00:00 ETA:   0:00:10
  2% (26 of 1000) |                      | Elapsed Time: 0:00:00 ETA:   0:00:09
  3% (36 of 1000) |                      | Elapsed Time: 0:00:00 ETA:   0:00:09
  4% (46 of 1000) |#                     | Elapsed Time: 0:00:00 ETA:   0:00:09
  5% (51 of 1000) |#                     | Elapsed Time: 0:00:00 ETA:   0:00:09
  6% (61 of 1000) |#                     | Elapsed Time: 0:00:00 ETA:   0:00:09
  7% (71 of 1000) |#                     | Elapsed Time: 0:00:00 ETA:   0:00:09
  7% (76 of 1000) |#                     | Elapsed Time: 0:00:00 ETA:   0:00:09
  8% (86 of 1000) |#                     | Elapsed Time: 0:00:00 ETA:   0:00:09
  9% (96 of 1000) |##                    | Elapsed Time: 0:00:00 ETA:   0:00:09
 10% (102 of 1000) |##                  

In [None]:
## sets
training_sets = []
validation_sets = []
for x in my_time_series_dataset:
    train, val = x.split_after(0.90)
    training_sets.append(train)
    validation_sets.append(val)

# Model

We create a N-Beats model that utilizes the GPU, Weights, Biases logger and early stopping callback.

## Early stopping

An early stopping callback is used to stop the training if the validation loss does not improve after a certain number of epochs.


In [None]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=0.01,
    patience=4,
    verbose=True,
    mode="min"
    )

In [None]:
encoders = {
    # "datetime_attribute": {"future": ["DateTime"], "past": ["DateTime"]},
    "position": {"past": ["absolute"], "future": ["relative"]},
    "transformer": Scaler(),
}

In [None]:
wandb_logger = WandbLogger(project="Digital-Energy", name=MODEL_NAME, log_model=True)


# input chunk = The length of the input sequence fed to the model
# output chunk = The length of the output sequence predicted by the model

model_nbeats = NBEATSModel(
    input_chunk_length=96*3,
    output_chunk_length=96,
    generic_architecture=False,
    #num_stacks=10,
    num_blocks=3,
    num_layers=5,
    layer_widths=512,
    n_epochs=15,
    nr_epochs_val_period=1,
    batch_size=2048,
    work_dir="../../../Models",
    save_checkpoints=True,
    model_name=MODEL_NAME,
    pl_trainer_kwargs={
    "enable_progress_bar": True,
    "enable_model_summary": True,
    "accelerator": "gpu",
    "devices": [1],
    "logger": wandb_logger,
    "callbacks": [early_stop_callback]
    },
    # loss_fn=torch.nn.CrossEntropyLoss() # custom loss function
    # optimizer_cls=torch.optim.Adam,
    # add_encoders=encoders,
    log_tensorboard=True,
)

In [None]:
#wandb_logger.watch(model_nbeats) # sadly this feature does not work for Darts models
model_nbeats.fit(series=training_sets, val_series=validation_sets, num_loader_workers=AVAILABLE_CPUS, max_samples_per_ts=2000)

In [None]:
START = 3000
for i, x in enumerate(sorted(glob.glob("../../../Data/london_clean/*.csv"))[START:START+10]):

    df = pd.read_csv(x)
    df["DateTime"] = pd.to_datetime(df['DateTime'])
    series = TimeSeries.from_dataframe(df, value_cols=['KWHhh'], time_col="DateTime", fill_missing_dates=True, freq="30min").astype(np.float32)
    series = series[-600:]


    pred_series = model_nbeats.historical_forecasts(
        series,
        forecast_horizon=5,
        stride=1,
        retrain=False,
        verbose=True,
    )

    print(f"rmse: {rmse(series, pred_series)}.")
    print(f"R2 score: {r2_score(series, pred_series)}.")

    helper.display_forecast(pred_series, series, "1 day", save=True, fig_name=f"{i}-test", model_name=f"{MODEL_NAME}", fig_size=(20,10))


# Loading checkpoints of the model

loading the best checkpoint of the model. To compare the results of the model with the previous one.

In [None]:
# load the model
model_nbeats = NBEATSModel.load_from_checkpoint(work_dir="../../Models/", model_name=MODEL_NAME, best=True)

In [None]:
START = 3000
for i, x in enumerate(sorted(glob.glob("../../Data/london_clean/*.csv"))[START:START+10]):

    df = pd.read_csv(x)
    df["DateTime"] = pd.to_datetime(df['DateTime'])
    series = TimeSeries.from_dataframe(df, value_cols=['KWHhh'], time_col="DateTime", fill_missing_dates=True, freq="30min").astype(np.float32)
    series = series[-600:]


    pred_series = model_nbeats.predict(
        1,
        series,
    )

    print(f"rmse: {rmse(series, pred_series)}.")
    print(f"R2 score: {r2_score(series, pred_series)}.")

    helper.display_forecast(pred_series, series, "1 day", save=True, fig_name=f"{i}-test", model_name=f"{MODEL_NAME}", fig_size=(20,10))

In [None]:
helper.display_forecast(pred_series, series, "1 day", save=False, fig_name=f"test", model_name=f"{MODEL_NAME}", fig_size=(20,10))