In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd
import numpy as np
from tqdm import trange, tqdm

from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile

from pandas import read_csv
from scipy import stats
from sklearn.preprocessing import MinMaxScaler

import torch
from torch.utils.data import Dataset
import torch.nn as nn

from torch.utils.data import DataLoader
from tqdm import tqdm, trange


In [3]:
name = 'LD2011_2014.txt'
save_name = 'elect'
num_covariates = 3
train_start = '2011-01-01 00:00:00'
train_end = '2014-08-31 23:00:00'
test_start = '2014-08-24 00:00:00' #need additional 7 days as given info
test_end = '2014-09-07 23:00:00'

save_path = os.path.join('data', save_name)
if not os.path.exists(save_path):
    os.makedirs(save_path)
csv_path = os.path.join(save_path, name)
if not os.path.exists(csv_path):
    zipurl = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'
    with urlopen(zipurl) as zipresp:
        with ZipFile(BytesIO(zipresp.read())) as zfile:
            zfile.extractall(save_path)

In [4]:
# Read the time series data from the CSV file, using the first column as the index and parsing dates.
target_df = pd.read_csv(csv_path, sep=";", index_col=0, parse_dates=True, decimal=',')

# Resample the data to 12-hour intervals, summing values within each period, and define the label and closure of intervals.
target_df = target_df.resample('12H', label='left', closed='right').sum()[train_start:test_end]

# Replace any missing values with zeros to avoid gaps in the data.
target_df.fillna(0, inplace=True)

# Split the dataset into training and test sets based on the defined date ranges.
train_target_df = target_df[train_start:train_end]
test_target_df = target_df[test_start:test_end]

# Determine the horizon size from the test dataset's duration.
horizon_size = test_target_df.shape[0]

# Initialize the MinMaxScaler to scale the data between 0 and 1.
scaler = MinMaxScaler()

# Fit the scaler on the training data to learn the scaling parameters.
scaler.fit(train_target_df)

# Transform the training data using the learned scaling parameters and convert it back to a DataFrame.
# Preserve the original index and column names.
train_target_df = pd.DataFrame(scaler.transform(train_target_df), index=train_target_df.index, columns=train_target_df.columns)

# Transform the test data using the same scaling parameters and convert it back to a DataFrame.
# Preserve the original index and column names.
test_target_df = pd.DataFrame(scaler.transform(test_target_df), index=test_target_df.index, columns=test_target_df.columns)

# Create a DataFrame to hold covariate features with the same index as the target DataFrame.
covariate_df = pd.DataFrame(index=target_df.index,
                            data={
                                'hour': target_df.index.hour,  # Extract the hour from the index.
                                'dayofweek': target_df.index.dayofweek,  # Extract the day of the week from the index.
                                'month': target_df.index.month  # Extract the month from the index.
                            })

# Standardize each covariate column to have a mean of 0 and standard deviation of 1.
for col in covariate_df.columns:
    covariate_df[col] = (covariate_df[col] - np.mean(covariate_df[col])) / np.std(covariate_df[col])

# Slice the covariate DataFrame to create a training covariate DataFrame corresponding to the training period.
train_covariate_df = covariate_df[train_start:train_end]

# Slice the covariate DataFrame to create a testing covariate DataFrame corresponding to the testing period.
test_covariate_df = covariate_df[test_start:test_end]


##  Multi-Horizon Quantile Recurrent Forecaster

In [5]:

# Define the quantiles for which predictions are desired.
desired_quantiles = [0.25, 0.5, 0.75, 0.95]

# Create a class for the MQRNN dataset, inheriting from PyTorch's Dataset class.
class MQRNN_dataset(Dataset):

    # Initialize the dataset with time series data, covariates, and other parameters.
    def __init__(self, series_df: pd.DataFrame, covariate_df: pd.DataFrame, horizon_size: int=horizon_size, quantile_size: int=len(desired_quantiles)):
        # Store the series data and covariates.
        self.series_df = series_df
        self.covariate_df = covariate_df
        self.horizon_size = horizon_size
        self.quantile_size = quantile_size

        # Prepare and store future covariate data.
        full_covariate = []
        covariate_size = self.covariate_df.shape[1]
        for i in range(1, self.covariate_df.shape[0] - horizon_size + 1):
            cur_covariate = []
            cur_covariate.append(self.covariate_df.iloc[i:i+horizon_size, :].to_numpy())
            full_covariate.append(cur_covariate)
        full_covariate = np.array(full_covariate)
        full_covariate = full_covariate.reshape(-1, horizon_size * covariate_size)
        self.next_covariate = full_covariate

    # Define the method to get the length of the dataset.
    def __len__(self):
        return self.series_df.shape[1]

    # Define the method to retrieve an item from the dataset.
    def __getitem__(self, idx):
        # Extract the current time series and covariates.
        cur_series = np.array(self.series_df.iloc[: -self.horizon_size, idx])
        cur_covariate = np.array(self.covariate_df.iloc[:-self.horizon_size, :])

        # Convert covariate size to a variable for readability.
        covariate_size = self.covariate_df.shape[1]

        # Prepare real values for the loss calculation.
        real_vals_list = []
        for i in range(1, self.horizon_size + 1):
            real_vals_list.append(np.array(self.series_df.iloc[i: self.series_df.shape[0] - self.horizon_size + i, idx]))
        real_vals_array = np.array(real_vals_list)  # [horizon_size, seq_len]
        real_vals_array = real_vals_array.T  # [seq_len, horizon_size]

        # Convert current series and covariates to tensors.
        cur_series_tensor = torch.tensor(cur_series)
        cur_series_tensor = torch.unsqueeze(cur_series_tensor, dim=1)  # [seq_len, 1]
        cur_covariate_tensor = torch.tensor(cur_covariate)  # [seq_len, covariate_size]
        cur_series_covariate_tensor = torch.cat([cur_series_tensor, cur_covariate_tensor], dim=1)
        next_covariate_tensor = torch.tensor(self.next_covariate)  # [seq_len, horizon_size * covariate_size]

        # Convert real values to a tensor.
        cur_real_vals_tensor = torch.tensor(real_vals_array)

        # Return the combined current series-covariate tensor, next covariate tensor, and real values tensor.
        return cur_series_covariate_tensor, next_covariate_tensor, cur_real_vals_tensor




# Define the MQRNN model class, inheriting from PyTorch's nn.Module.
class MQRNN(nn.Module):
    # Initialize the model with given parameters.
    def __init__(self,
                horizon_size:int=horizon_size,  # Define the forecast horizon size.
                hidden_size:int=100,  # Set the size of hidden layers.
                quantiles:list=desired_quantiles,  # Specify the quantiles for prediction.
                dropout:float=0.3,  # Set dropout rate for regularization.
                layer_size:int=3,  # Define the number of layers in the LSTM.
                context_size:int=50,  # Set the size of the context vectors.
                covariate_size:int=num_covariates,  # Define the number of covariates.
                bidirectional=False,  # Set bidirectional LSTM if True.
                device=torch.device('cuda')):  # Specify the device (GPU or CPU) for computation.
        super(MQRNN, self).__init__()  # Initialize the superclass (nn.Module).

        # Store the provided attributes in the instance.
        self.quantiles = desired_quantiles
        self.quantile_size = len(quantiles)
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.horizon_size = horizon_size
        self.device = device
        self.covariate_size = covariate_size

        # Define the encoder as an LSTM network.
        self.encoder = nn.LSTM(input_size=covariate_size+1,
                            hidden_size=hidden_size,
                            num_layers=layer_size,
                            dropout=dropout,
                            bidirectional=bidirectional)

        # Define the global decoder as a series of linear layers with ReLU activations.
        self.global_decoder = nn.Sequential(nn.Linear(in_features=hidden_size + covariate_size*horizon_size, out_features=horizon_size*hidden_size*3),
                                           nn.ReLU(),
                                           nn.Linear(in_features=horizon_size*hidden_size*3, out_features=horizon_size*hidden_size*2),
                                           nn.ReLU(),
                                           nn.Linear(in_features=horizon_size*hidden_size*2, out_features=(horizon_size+1)*context_size),
                                           nn.ReLU())

        # Define the local decoder similarly with linear layers and ReLU activations.
        self.local_decoder = nn.Sequential(nn.Linear(in_features=horizon_size*context_size + horizon_size*covariate_size + context_size, out_features=horizon_size*context_size),
                                           nn.ReLU(),
                                           nn.Linear(in_features=horizon_size*context_size, out_features=horizon_size*self.quantile_size),
                                           nn.ReLU())

        # Set the data type to double for all layers and move them to the specified device (GPU/CPU).
        self.encoder.double().to(self.device)
        self.global_decoder.double().to(self.device)
        self.local_decoder.double().to(self.device)

    # Define the forward pass of the model.
    def forward(self, cur_series_covariate_tensor, next_covariate_tensor):
        # Extract sequence length and batch size from the input tensor.
        seq_len, batch_size = cur_series_covariate_tensor.shape[0], cur_series_covariate_tensor.shape[1]

        # Determine the direction size based on whether the LSTM is bidirectional.
        direction_size = 2 if self.bidirectional else 1

        # Pass the input through the encoder LSTM.
        outputs, _ = self.encoder(cur_series_covariate_tensor)

        # Reshape the outputs to separate the direction and hidden size.
        outputs_reshape = outputs.view(seq_len, batch_size, direction_size, self.hidden_size)[:,:,-1,:]
        encoder_outputs = outputs_reshape.view(seq_len, batch_size, self.hidden_size)

        # If not training, use only the last encoder output for all future time steps.
        if not self.training:
            encoder_outputs = torch.unsqueeze(encoder_outputs[-1], dim=0)  # [1, 1, hidden_size]

        # Pass the encoder outputs and next covariates through the global decoder.
        global_decoder_output = self.global_decoder(torch.cat([encoder_outputs, next_covariate_tensor], dim=2))

        # Pass the global decoder output and next covariates through the local decoder.
        local_decoder_output = self.local_decoder(torch.cat([global_decoder_output, next_covariate_tensor], dim=2))

        # Extract sequence length and batch size from the local decoder output.
        seq_len = local_decoder_output.shape[0]
        batch_size = local_decoder_output.shape[1]

        # Reshape the local decoder output for the specified quantiles.
        return local_decoder_output.view(seq_len, batch_size, self.horizon_size, self.quantile_size)




# Define the training function for the MQRNN model.
def train(model, train_dataset, desired_quantiles=desired_quantiles, batch_size=1, num_epochs=1, lr=1e-3, device=torch.device("cuda")):
    # Initialize the optimizer with the Adam algorithm.
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # Create a DataLoader to iterate over the training dataset.
    data_iter = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

    # Set the model to training mode.
    model.train()
    # Iterate over the specified number of epochs.
    for epoch in range(num_epochs):
        epoch_loss_sum = 0.0  # Initialize the sum of loss for this epoch.
        total_sample = 0  # Initialize the total number of samples processed.
        pbar = tqdm(data_iter)  # Initialize a progress bar for the data iterator.

        # Iterate over batches of data.
        for (cur_series_tensor, cur_covariate_tensor, cur_real_vals_tensor) in pbar:
            # Extract batch size, sequence length, and horizon size from the tensors.
            batch_size, seq_len, horizon_size = cur_series_tensor.shape[0], cur_series_tensor.shape[1], cur_covariate_tensor.shape[-1]
            total_sample += batch_size * seq_len * horizon_size  # Update total sample count.
            optimizer.zero_grad()  # Reset gradients to zero before starting backpropagation.

            # Prepare tensors for input to the model and move them to the specified device.
            cur_series_covariate_tensor = cur_series_tensor.double().permute(1, 0, 2).to(device)  # Rearrange and convert to double.
            next_covariate_tensor = cur_covariate_tensor.double().permute(1, 0, 2).to(device)  # Rearrange and convert to double.
            cur_real_vals_tensor = cur_real_vals_tensor.double().permute(1, 0, 2).to(device)  # Rearrange and convert to double.

            # Pass tensors through the model to get the output.
            model_output = model(cur_series_covariate_tensor, next_covariate_tensor)

            # Calculate losses for each quantile.
            losses = []
            for i, p in enumerate(desired_quantiles):
                errors = cur_real_vals_tensor - model_output[:, :, :, i]  # Calculate errors for each quantile.
                losses.append(torch.max((p - 1) * errors, p * errors))  # Apply quantile loss formula.
            total_loss = torch.mean(torch.sum(torch.cat(losses, dim=1), dim=1)).to(device)  # Calculate mean of total loss.

            # Update the progress bar with the current loss.
            pbar.set_description(f"Loss: {total_loss.item()}")
            total_loss.backward()  # Perform backpropagation.
            optimizer.step()  # Update model parameters.
            epoch_loss_sum += total_loss.item()  # Accumulate loss for the epoch.

        # Calculate and print the average loss for the epoch.
        epoch_loss_mean = epoch_loss_sum / total_sample
        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch + 1}/{num_epochs}, Current Loss: {epoch_loss_mean}")

### evaluate Function

# Define the evaluate function for the MQRNN model.
def evaluate(model, device=torch.device('cuda'), covariate_size=3):
    # Convert training covariate DataFrame to tensor and move to the specified device.
    full_covariate_tensor = torch.tensor(train_covariate_df.to_numpy()).to(device)
    # Convert test covariate DataFrame to tensor, reshape, unsqueeze, and move to the specified device.
    next_covariate_tensor = torch.tensor(test_covariate_df.to_numpy().reshape(-1, horizon_size * covariate_size)).unsqueeze(dim=0).to(device)
    results = []  # Initialize a list to store results.

    # Set the model to evaluation mode.
    model.eval()
    # Disable gradient calculations.
    with torch.no_grad():
        # Iterate over columns in the training target DataFrame.
        for colname in tqdm(train_target_df.columns):
            # Convert the column to tensor and move to the specified device.
            input_target_tensor = torch.tensor(train_target_df[[colname]].to_numpy()).to(device)
            # Combine target tensor with covariate tensor, unsqueeze, and move to the specified device.
            input_target_covariate_tensor = torch.unsqueeze(torch.cat([input_target_tensor, full_covariate_tensor], dim=1), dim=0).to(device)
            # Rearrange the tensor dimensions.
            input_target_covariate_tensor = input_target_covariate_tensor.permute(1, 0, 2).to(device)


In [6]:
model = MQRNN()
train_dataset = MQRNN_dataset(train_target_df, train_covariate_df)
train(model, train_dataset, batch_size=16, num_epochs=15, lr=0.0001)
# Save the model state
model_save_path = '/content/drive/MyDrive/Colab_Notebooks/ads_506/mqrnn2.pth'
torch.save(model.state_dict(), model_save_path)

Loss:0.8355442649751902: 100%|██████████| 24/24 [01:14<00:00,  3.11s/it]
Loss:0.7289806183880713: 100%|██████████| 24/24 [01:12<00:00,  3.04s/it]
Loss:0.45107804422925: 100%|██████████| 24/24 [01:11<00:00,  3.00s/it]
Loss:0.650795937595124: 100%|██████████| 24/24 [01:12<00:00,  3.01s/it]
Loss:0.29195850184528627: 100%|██████████| 24/24 [01:11<00:00,  2.99s/it]


epoch_num 5, current loss is: 1.0988836889952198e-06


Loss:0.2491680562709323: 100%|██████████| 24/24 [01:11<00:00,  2.99s/it]
Loss:0.2601942242261246: 100%|██████████| 24/24 [01:11<00:00,  3.00s/it]
Loss:0.24870597028499428: 100%|██████████| 24/24 [01:12<00:00,  3.02s/it]
Loss:0.1801676619723639: 100%|██████████| 24/24 [01:13<00:00,  3.05s/it]
Loss:0.177961324489008: 100%|██████████| 24/24 [01:13<00:00,  3.06s/it]


epoch_num 10, current loss is: 4.23120875475864e-07


Loss:0.177108150255517: 100%|██████████| 24/24 [01:12<00:00,  3.01s/it]
Loss:0.21538066589021831: 100%|██████████| 24/24 [01:12<00:00,  3.03s/it]
Loss:0.20035888852840025: 100%|██████████| 24/24 [01:12<00:00,  3.02s/it]
Loss:0.21567029552323666: 100%|██████████| 24/24 [01:12<00:00,  3.01s/it]
Loss:0.3538721460684165: 100%|██████████| 24/24 [01:12<00:00,  3.02s/it]


epoch_num 15, current loss is: 4.1205895706314956e-07


In [7]:
test_rmse = evaluate(model)
print(test_rmse)


100%|██████████| 370/370 [00:27<00:00, 13.57it/s]

0.5520811276694852





In [8]:
print(test_rmse)


0.5520811276694852
