In [1]:
import os, sys

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning import LightningModule
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, Callback
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pytorch_lightning.loggers import WandbLogger
from scipy.stats import boxcox
from PIL import Image
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from io import BytesIO

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px


In [2]:
import pandas as pd
import os
# Set working directory
os.chdir(r"..") # should be the git repo root directory
print("Current working directory: " + os.getcwd())
repo_name = 'net-load-forecasting'
assert os.getcwd()[-len(repo_name):] == "net-load-forecasting", "Working directory is not the git repo root directory"


from utils.utils import *

Current working directory: c:\Users\nik\Desktop\Berkeley_Projects\net-load-forecasting


# Patch Transformer

For 1 minute resolution data it makes sense to patch the data into 15 minute chunks.

In [77]:
from sklearn.preprocessing import MinMaxScaler

from darts import TimeSeries
from darts.utils.missing_values import extract_subseries


class PatchedTimeseriesDataset(Dataset):

    def __init__(self, data_dir, hours_ahead, hours_lookback, patch_n_minutes, stage = 'train', scaler = None ) -> None:
        super().__init__()

        self.data_dir = data_dir
        self.freq = None
        self.stage = stage
        self.scaler = scaler
        self.df = self._load_data()
        # timeseries interval conversions:
        self.freq = infer_frequency(self.df)
        self.output_chunk_len = int(hours_ahead * 60 // self.freq)
        self.input_chunk_len = int(hours_lookback * 60 // self.freq)
        self.patch_len = int(patch_n_minutes // self.freq)

        assert hours_ahead * 60 % patch_n_minutes == 0, "hours ahead must be a multiple of patch size"
        assert hours_lookback * 60 % patch_n_minutes == 0, "hours lookback must be a multiple of patch size"
        assert stage in ['train', 'val', 'test'], "stage must be one of 'train', 'val', 'test'"

        self.load_series, self.ts_index = self._setup_data()


    def __len__(self):
        return len(self.load_series) - self.input_chunk_len - self.output_chunk_len + 1
    

    def __getitem__(self, idx):

        # input chunk
        input_chunk_index = self.ts_index[idx:idx+self.input_chunk_len]
        datetime_encodings_input = self._timenc(input_chunk_index)[::self.patch_len, :]# skip every patch_size
        input_chunk_series = self.load_series[idx:idx+self.input_chunk_len].reshape(self.input_chunk_len // self.patch_len, self.patch_len)
        input_chunk = np.concatenate([input_chunk_series, datetime_encodings_input], axis=1)

        # output chunk
        output_chunk_index = self.ts_index[idx+self.input_chunk_len:idx+self.input_chunk_len+self.output_chunk_len]
        datetime_encodings_output = self._timenc(output_chunk_index)[::self.patch_len, :]# skip every patch_size
        output_chunk_series = self.load_series[idx+self.input_chunk_len:idx+self.input_chunk_len+self.output_chunk_len].reshape(self.output_chunk_len // self.patch_len, self.patch_len)
        output_chunk = np.concatenate([output_chunk_series, datetime_encodings_output], axis=1)

        return input_chunk, output_chunk


    def _timenc(self, ts_index):

        # TODO: add option for trigonometric encoding

        hour = ts_index.hour / 24
        day = ts_index.day / 31
        month = ts_index.month / 12
        datetime_encodings = np.vstack([hour, day, month]).T
        
        return datetime_encodings

    def _load_data(self):
        df = pd.read_hdf(self.data_dir, key='1min/netload')
        # getting the frequency of the time series in minutes
        return df
    
    def _setup_data(self):
        #make sure that the time series is continuous, i.e. no missing values
        # we only want to use the longest continuous subseries
        ts = TimeSeries.from_dataframe(self.df, freq = self.freq)
        ts_subseries = extract_subseries(ts)
        ts_subseries_reviewed = review_subseries(ts_subseries, min_length=(self.input_chunk_len + self.output_chunk_len))[0]

        n_subseries = len(ts_subseries_reviewed)

        if self.stage == 'train':
            self.scaler = MinMaxScaler()
            ts_subseries_reviewed = ts_subseries_reviewed[:int(n_subseries*0.8)]
            ts = ts_subseries_reviewed[get_longest_subseries_idx(ts_subseries_reviewed)]
            values = ts.pd_dataframe().values
            values = self.scaler.fit_transform(values)

        elif self.stage == 'val':
            ts_subseries_reviewed = ts_subseries_reviewed[int(n_subseries*0.8):int(n_subseries*0.9)]
            ts = ts_subseries_reviewed[get_longest_subseries_idx(ts_subseries_reviewed)]
            values = ts.pd_dataframe().values
            values = self.scaler.transform(values)

        elif self.stage == 'test':
            ts_subseries_reviewed = ts_subseries_reviewed[int(n_subseries*0.9):]
            ts = ts_subseries_reviewed[get_longest_subseries_idx(ts_subseries_reviewed)]
            values = ts.pd_dataframe().values
            values = self.scaler.transform(values)


        index = ts.pd_dataframe().index

        return values, index
        

In [78]:
data_path = os.path.join(os.getcwd(), 'data', 'clean_data', 'data_net_load_forecasting.h5')

patch_minutes = 60
hours_ahead = 4
hours_lookback = 5


ds_train = PatchedTimeseriesDataset(data_path, hours_ahead, hours_lookback, patch_minutes, stage = 'train')
ds_val = PatchedTimeseriesDataset(data_path, hours_ahead, hours_lookback, patch_minutes, stage = 'val', scaler = ds_train.scaler)
ds_test = PatchedTimeseriesDataset(data_path, hours_ahead, hours_lookback, patch_minutes, stage = 'test', scaler = ds_train.scaler)

In [79]:
ds_train.patch_len

60

In [80]:

def worker_init_fn(worker_id):
    np.random.seed(42 + worker_id)

class PatchedTimeseriesDataLoader(DataLoader):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.collate_fn = self.collate_fn_

    def collate_fn_(self, batch):
        input_chunks, output_chunks = zip(*batch)
        input_chunks, output_chunks = np.stack(input_chunks), np.stack(output_chunks)
        input_tensor = torch.FloatTensor(input_chunks)
        output_tensor = torch.FloatTensor(output_chunks)
        return input_tensor, output_tensor
    

In [81]:
dl_train = PatchedTimeseriesDataLoader(ds_train, batch_size=128, shuffle=True, num_workers=0, worker_init_fn=worker_init_fn)
dl_val = PatchedTimeseriesDataLoader(ds_val, batch_size=128, shuffle=False, num_workers=0, worker_init_fn=worker_init_fn)
dl_test = PatchedTimeseriesDataLoader(ds_test, batch_size=128, shuffle=False, num_workers=0, worker_init_fn=worker_init_fn)

In [82]:
for x, y in dl_train:
    print(x.shape, y.shape)
    break

torch.Size([128, 5, 63]) torch.Size([128, 4, 63])


In [13]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')





In [100]:
import wandb

class EncoderTransformer(pl.LightningModule):
    def __init__(self, input_size, output_size, hidden_size, d_model, nhead, num_layers, dropout, loss_fn, lr=0.001):
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.d_model = d_model
        self.nhead = nhead
        self.num_layers = num_layers
        self.dropout = dropout
        self.loss_fn = loss_fn
        self.lr = lr

        self.linear = nn.Linear(hidden_size, d_model)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, src, trg=None):
        src = self.linear(src)
        output = self.transformer_encoder(src)
        output = self.decoder(output)
        return output


    def training_step(self, batch):
        src, trg = batch
        trg = trg[:, :, :]
        output = self(src, trg)
        loss = self.loss_fn(output, trg)
        #loss = F.mse_loss(output, trg)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        src, trg = batch
        trg = trg[:, :,:1]
        output = self(src)
        loss = self.loss_fn(output, trg)
        self.log('val_loss', loss)
        if batch_idx == 0:
            buffers = self._plot_predictions(output, trg)
            # Combine the image buffers into a single image
            images = [np.array(Image.open(buffer)) for buffer in buffers]
            combined_image = np.concatenate(images, axis=1)
            # Log the combined image to WandB
            wandb.log({"predictions_val_dataset": wandb.Image(combined_image)})
        return loss
    
    def test_step(self, batch, batch_idx):
        src, trg = batch
        trg = trg[:, :, :1]
        output = self(src)
        loss = self.loss_fn(output, trg)
        self.log('test_loss', loss)
        if batch_idx == 0:
            buffers = self._plot_predictions(output, trg)
            # Combine the image buffers into a single image
            images = [np.array(Image.open(buffer)) for buffer in buffers]
            combined_image = np.concatenate(images, axis=1)
            # Log the combined image to WandB
            wandb.log({"predictions_test_dataset": wandb.Image(combined_image)})
        return loss

    def _plot_predictions(self, preds, actuals):
        preds = preds.detach().cpu().numpy()
        actuals = actuals.detach().cpu().numpy()
        buffers = []
        for i in range(5): # plot 5 sequences
            fig, ax = plt.subplots(1, 1, figsize=(20, 10))
            # plotting the i-th sequence in the batch
            ax.plot(preds[i, :, 0], label='Predictions')
            ax.plot(actuals[i, :, 0], label='Actuals')
            ax.legend()
            # Convert the figure to an image buffer
            canvas = FigureCanvas(fig)
            buffer = BytesIO()
            canvas.print_figure(buffer, format='png')
            buffer.seek(0)
            # Close the figure to save memory
            plt.close(fig)
            # Append the image buffer to the list of buffers
            buffers.append(buffer)
        # Return the list of image buffers
        return buffers
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)
    

In [101]:
input_size = ds_train.input_chunk_len
output_size = ds_train.output_chunk_len
hidden_size = x.shape[2]

d_model = 64
nhead = 4
num_layers = 2

dropout = 0.1
loss_fn = nn.MSELoss()

model = EncoderTransformer(input_size, output_size, hidden_size, d_model, nhead, num_layers, dropout, loss_fn)

63

In [96]:
fc = nn.Linear(hidden_size, d_model)

q = fc(x)