In [24]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
import os
import pandas as pd
import numpy as np
from tqdm import trange, tqdm

from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile

from pandas import read_csv
from scipy import stats

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import RandomSampler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torch.optim as optim
from tqdm import trange, tqdm

In [11]:
window_size = 192
stride_size = 24
target_window_size = 24
num_covariates = 3
train_start = '2011-01-01 00:00:00'
train_end = '2014-08-31 23:00:00'
test_start = '2014-08-25 00:00:00' #need additional 7 days as given info
test_end = '2014-09-07 23:00:00'


In [12]:
def prep_data(data, covariates, data_start, train = True):
    # Calculate the length of time series data.
    time_len = data.shape[0]
    # Determine input size based on window and stride sizes.
    input_size = window_size - stride_size
    # Calculate the number of windows per series based on the data length, input size, and target window size.
    windows_per_series = np.full((num_series), (time_len - input_size - target_window_size) // stride_size)
    # If training, adjust the number of windows per series based on the data start index.
    if train: windows_per_series -= (data_start + stride_size - 1) // stride_size
    # Calculate the total number of windows across all series.
    total_windows = np.sum(windows_per_series)
    # Initialize arrays for input data, labels, and additional inputs.
    x_input = np.zeros((total_windows, window_size, 1 + num_covariates), dtype='float32')
    label = np.zeros((total_windows, target_window_size, 1 + num_covariates), dtype='float32')
    v_input = np.zeros((total_windows, 2), dtype='float32')
    # Initialize a counter for tracking the current window.
    count = 0
    # Iterate over each time series to process data.
    for series in trange(num_series):
        # Iterate over each window in the current series.
        for i in range(windows_per_series[series]):
            # Calculate the start index of the window based on whether it's training or testing data.
            if train:
                window_start = stride_size * i + data_start[series]
            else:
                window_start = stride_size * i
            # Calculate the end index of the input window and the target window.
            window_end = window_start + window_size
            target_window_end = window_end + target_window_size
            # Assign data and covariates to the input array.
            x_input[count, :, 0] = data[window_start:window_end, series]
            x_input[count, :, 1:1 + num_covariates] = covariates[window_start:window_end, :]
            # Assign target data and covariates to the label array.
            label[count, :, 0] = data[window_end:target_window_end, series]
            label[count, :, 1:1 + num_covariates] = covariates[window_end:target_window_end, :]
            # Calculate the sum of non-zero elements in the current window.
            nonzero_sum = (x_input[count, 1:input_size, 0] != 0).sum()
            # Handle cases where the sum of non-zero elements is zero.
            if nonzero_sum == 0:
                v_input[count, 0] = 0
            else:
                # Calculate the scaling factor for normalization and apply it to the input and label data.
                v_input[count, 0] = np.true_divide(x_input[count, :input_size, 0].sum(), nonzero_sum) + 1
                x_input[count, :, 0] = x_input[count, :, 0] / v_input[count, 0]
                label[count, :, 0] = label[count, :, 0] / v_input[count, 0]
            # Increment the window count.
            count += 1
    # Return the prepared input data, additional input, and labels.
    return x_input, v_input, label


In [13]:
def gen_covariates(times, num_covariates):
    covariates = np.zeros((times.shape[0], num_covariates))
    for i, input_time in enumerate(times):
        covariates[i, 0] = input_time.weekday()
        covariates[i, 1] = input_time.hour
        covariates[i, 2] = input_time.month
    return covariates[:, :num_covariates]

In [14]:
name = 'LD2011_2014.txt'
save_name = 'elect'
save_path = os.path.join('data', save_name)

if not os.path.exists(save_path):
    os.makedirs(save_path)
csv_path = os.path.join(save_path, name)
if not os.path.exists(csv_path):
    zipurl = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip'
    with urlopen(zipurl) as zipresp:
        with ZipFile(BytesIO(zipresp.read())) as zfile:
            zfile.extractall(save_path)

data_frame = pd.read_csv(csv_path, sep=";", index_col=0, parse_dates=True, decimal=',')
data_frame = data_frame.resample('1H',label = 'left',closed = 'right').sum()[train_start:test_end]
data_frame.fillna(0, inplace=True) # (32304, 370)
# generate covariates (has both train and test limits)
covariates = gen_covariates(data_frame[train_start:test_end].index, num_covariates) # (32304, 3)

In [15]:
# Import MinMaxScaler from scikit-learn for data normalization.
from sklearn.preprocessing import MinMaxScaler

# Determine the number of unique values in each covariate.
cov_dims = pd.DataFrame(covariates).nunique().tolist()

# Select the training data from the data frame based on specified start and end indices.
train_data = data_frame[train_start:train_end]

# Select the test data from the data frame based on specified start and end indices.
test_data = data_frame[test_start:test_end]

# Initialize the MinMaxScaler for normalization.
scaler = MinMaxScaler()

# Fit the scaler to the training data to learn the scaling parameters.
scaler.fit(train_data)

# Transform and normalize the training data using the fitted scaler, preserving the index and column names.
train_target_df = pd.DataFrame(scaler.transform(train_data), index=train_data.index, columns=train_data.columns)

# Transform and normalize the test data using the same scaler, preserving the index and column names.
test_target_df = pd.DataFrame(scaler.transform(test_data), index=test_data.index, columns=test_data.columns)

# Convert the normalized training data DataFrame to a NumPy array for processing.
train_data = train_target_df.values

# Convert the normalized test data DataFrame to a NumPy array for processing.
test_data = test_target_df.values

# Find the first non-zero value in each time series of the training data.
data_start = (train_data != 0).argmax(axis=0)

# Store the total number of time points in the dataset.
total_time = data_frame.shape[0] # 32304

# Store the number of individual time series in the dataset.
num_series = data_frame.shape[1] # 370

# Prepare the training data using the prep_data function, providing train_data, covariates, and data_start.
X_train, v_train, y_train = prep_data(train_data, covariates, data_start)

# Prepare the test data using the prep_data function, specifying that it is test data (train=False).
X_test, v_test, y_test = prep_data(test_data, covariates, data_start, train=False)


100%|██████████| 370/370 [00:10<00:00, 36.43it/s]
100%|██████████| 370/370 [00:00<00:00, 8454.88it/s]


`class TrainDataset`: it defines a custom dataset class for training data compatible with PyTorch.

`class TestDataset`: it defines a custom dataset class for test data compatible with PyTorch

In [16]:
class TrainDataset(Dataset):
    def __init__(self, data, label):
        # Initialize dataset with data and labels.
        self.data = data
        self.label = label
        # Store the number of samples in the dataset.
        self.train_len = self.data.shape[0]

    # Return the length of the dataset.
    def __len__(self):
        return self.train_len

    # Define method to get a specific item from the dataset by index.
    def __getitem__(self, index):
        # Return a tuple of current time series sequence, current covariates, label sequence, and future covariates.
        return (self.data[index, :, 0], self.data[index, :, 1:1+num_covariates], self.label[index, :, 0], self.label[index, :, 1:1+num_covariates])


class TestDataset(Dataset):
    def __init__(self, data, v, label):
        # Initialize dataset with data, normalizing stats, and labels.
        self.data = data
        self.v = v
        self.label = label
        # Store the number of samples in the dataset.
        self.test_len = self.data.shape[0]

    # Return the length of the dataset.
    def __len__(self):
        return self.test_len

    # Define method to get a specific item from the dataset by index.
    def __getitem__(self, index):
        # Return a tuple of current time series sequence, current covariates, normalizing stats, label sequence, and future covariates.
        return (self.data[index, :, 0], self.data[index, :, 1:1+num_covariates], self.v[index], self.label[index, :, 0], self.label[index, :, 1:1+num_covariates])


In [21]:
train_batch_size = 8

train_set = TrainDataset(X_train, y_train)
test_set = TestDataset(X_test, v_test, y_test)
train_loader = DataLoader(train_set, batch_size=train_batch_size, drop_last=True)
test_loader = DataLoader(test_set, batch_size=len(test_set), sampler=RandomSampler(test_set))

In [17]:
# Define a class for a residual block in a neural network.
class ResidualBlock(nn.Module):
    # Initialize the residual block with specified parameters.
    def __init__(self, input_dim, d, stride=1, num_filters=35, p=0.2, k=2, weight_norm=True):
        super(ResidualBlock, self).__init__()
        # Store kernel size, dilation, and dropout function.
        self.k, self.d, self.dropout_fn = k, d, nn.Dropout(p)

        # Define two 1D convolutional layers.
        self.conv1 = nn.Conv1d(input_dim, num_filters, kernel_size=k, dilation=d)
        self.conv2 = nn.Conv1d(num_filters, num_filters, kernel_size=k, dilation=d)
        # Apply weight normalization if enabled.
        if weight_norm:
            self.conv1, self.conv2 = nn.utils.weight_norm(self.conv1), nn.utils.weight_norm(self.conv2)

        # Define a downsampling layer if input and output dimensions differ.
        self.downsample = nn.Conv1d(input_dim, num_filters, 1) if input_dim != num_filters else None

    # Define the forward pass for the block.
    def forward(self, x):
        # Apply dropout and ReLU activation after first convolutional layer.
        out = self.dropout_fn(F.relu(self.conv1(x.float())))
        # Apply dropout and ReLU activation after second convolutional layer.
        out = self.dropout_fn(F.relu(self.conv2(out)))

        # Calculate the residual connection.
        residual = x if self.downsample is None else self.downsample(x)
        # Return the ReLU activation of the sum of the convolutional output and residual.
        return F.relu(out + residual[:, :, -out.shape[2]:])

# Define a class for processing future residuals.
class FutureResidual(nn.Module):
    # Initialize the FutureResidual class with the input feature size.
    def __init__(self, in_features):
        super(FutureResidual, self).__init__()
        # Define a sequential model with two linear layers and ReLU activation.
        self.net = nn.Sequential(nn.Linear(in_features=in_features, out_features=in_features),
                                 nn.ReLU(),
                                 nn.Linear(in_features=in_features, out_features=in_features))

    # Define the forward pass.
    def forward(self, lag_x, x):
        # Pass the input through the sequential model and get the output.
        out = self.net(x.squeeze())
        # Concatenate the output with the lagged input and apply ReLU activation.
        return F.relu(torch.cat((lag_x, out), dim=2))


In [18]:
# Define the DeepTCN class as a subclass of nn.Module.
class DeepTCN(nn.Module):
    # Initialize the DeepTCN with specified parameters.
    def __init__(self, cov_dims=cov_dims, num_class=num_series, embedding_dim=20, dilations=[1,2,4,8,16,24,32], p=0.25, device=torch.device('cuda')):
        super(DeepTCN, self).__init__()
        # Set input dimensions, covariate dimensions, and device for computation.
        self.input_dim, self.cov_dims, self.embeddings, self.device = 1+(len(cov_dims)*embedding_dim), cov_dims, [], device
        # Create embeddings for each covariate dimension.
        for cov in cov_dims:
            self.embeddings.append(nn.Embedding(num_class, embedding_dim, device=device))

        # Initialize the encoder as a list of residual blocks.
        self.encoder = nn.ModuleList()
        for d in dilations:
            self.encoder.append(ResidualBlock(input_dim=self.input_dim, num_filters=self.input_dim, d=d))
        # Initialize the decoder using the FutureResidual module.
        self.decoder = FutureResidual(in_features=self.input_dim-1)
        # Define a multi-layer perceptron (MLP) for processing the output.
        self.mlp = nn.Sequential(nn.Linear(1158, 8), nn.BatchNorm1d(8), nn.SiLU(), nn.Dropout(p), nn.Linear(8,1), nn.ReLU())

    # Define the forward pass of the model.
    def forward(self, x, current_cov, next_cov):
        # Initialize lists to store embeddings for current and next covariates.
        current_cov_embeddings, next_cov_embeddings = [], []
        # Generate embeddings for each current and next covariate.
        for cov_idx, cov_dim in enumerate(self.cov_dims):
            current_cov_embeddings.append(self.embeddings[cov_idx](current_cov[:,:,cov_idx].to(self.device).long()))
            next_cov_embeddings.append(self.embeddings[cov_idx](next_cov[:,:,cov_idx].to(self.device).long()))
        # Concatenate all current covariate embeddings.
        embed_concat = torch.cat(current_cov_embeddings, dim=2).to(self.device)
        # Concatenate all next covariate embeddings.
        next_cov_concat = torch.cat(next_cov_embeddings, dim=2).to(self.device)

        # Combine time series data with current covariate embeddings.
        encoder_input = torch.cat((x.unsqueeze(2), embed_concat), dim=2)
        # Permute the encoder input to match expected dimensions.
        encoder_input = encoder_input.permute(0, 2, 1)

        # Pass the input through each layer of the encoder.
        for layer in self.encoder:
            encoder_input = layer(encoder_input)
        # Permute the encoder output and reshape it.
        encoder_output = encoder_input.permute(0, 2, 1)
        encoder_output = torch.reshape(encoder_output, (encoder_output.shape[0], 1, -1))
        # Repeat encoder output to match dimensions of next covariates.
        encoder_output = torch.repeat_interleave(encoder_output, next_cov_concat.shape[1], dim=1)

        # Process the encoder output and next covariates through the decoder.
        decoder_output = self.decoder(lag_x=encoder_output, x=next_cov_concat)
        # Obtain dimensions for reshaping the decoder output.
        t, n = decoder_output.size(0), decoder_output.size(1)
        # Reshape and pass the decoder output through the MLP.
        decoder_output = decoder_output.view(t * n, -1)
        output = self.mlp(decoder_output.float())
        # Reshape the final output to match the target dimensions.
        output = output.view(t, n, -1)

        # Return the squeezed output tensor.
        return output.squeeze()


In [19]:
# Define a function to train the model.
def train(model, device=torch.device('cuda'), num_epochs = 1, learning_rate = 1e-3):
    # Get the length of the training data loader.
    train_len = len(train_loader)
    # Initialize the optimizer with model parameters and learning rate.
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # Create an array to store loss values for each epoch.
    loss_summary = np.zeros((train_len * num_epochs))
    # Define the loss function as mean squared error.
    loss_fn = F.mse_loss

    # Iterate over the specified number of epochs.
    for epoch in range(num_epochs):
        # Set the model to training mode.
        model.train()
        # Create an array to store loss for each batch in an epoch.
        loss_epoch = np.zeros(len(train_loader))

        # Iterate over the training data loader.
        pbar = tqdm(train_loader)
        for (ts_data_batch, current_covs_batch, labels_batch, next_covs_batch) in pbar:
            # Reset gradients to zero before starting backpropagation.
            optimizer.zero_grad()

            # Initialize a tensor for storing loss.
            loss = torch.zeros(1, device=device, dtype=torch.float32)
            # Pass the data through the model and get output.
            out = model(ts_data_batch.to(device), current_covs_batch.to(device), next_covs_batch.to(device))
            # Calculate the loss between the output and the labels.
            loss = loss_fn(out.float(), labels_batch.squeeze().to(device).float())

            # Display the current loss in the progress bar.
            pbar.set_description(f"Loss:{loss.item()}")
            # Perform backpropagation.
            loss.backward()
            # Update model parameters.
            optimizer.step()

        # Store the loss for the current epoch in the loss summary.
        loss_summary[epoch * train_len:(epoch + 1) * train_len] = loss.cpu().detach()

    # Return the loss summary and optimizer for further use.
    return loss_summary, optimizer

# Define a function to evaluate the model.
def evaluate(model, optimizer, device=torch.device('cuda')):
    # Initialize a list to store results.
    results = []

    # Disable gradient calculations for evaluation.
    with torch.no_grad():
        # Set the model to evaluation mode.
        model.eval()
        # Initialize an array to store loss for each batch in an epoch.
        loss_epoch = np.zeros(len(train_loader))

        # Iterate over the test data loader.
        pbar = tqdm(test_loader)
        for (ts_data_batch, current_covs_batch, v_batch, labels_batch, next_covs_batch) in pbar:
            # Reset gradients to zero before starting backpropagation.
            optimizer.zero_grad()

            # Pass the data through the model and get output.
            out = model(ts_data_batch.to(device), current_covs_batch.to(device), next_covs_batch.to(device))
            # Append the squeezed output to the results list.
            results.append(out.squeeze(0).cpu())

    # Concatenate all results to form a single predictions tensor.
    predictions = torch.cat(results)
    # Define the criterion as mean squared error loss.
    criterion = nn.MSELoss()
    # Calculate the root mean squared error (RMSE) between predictions and labels.
    test_rmse = torch.sqrt(criterion(predictions, labels_batch)).item()
    # Return the calculated RMSE value.
    return test_rmse


In [None]:
model = DeepTCN(device=torch.device('cuda')).cuda()
loss, optimizer = train(model, num_epochs=2)



In [25]:
model_save_path = '/content/drive/MyDrive/Colab_Notebooks/ads_506/deeptcn.pth'
torch.save(model.state_dict(), model_save_path)

In [26]:
evaluate(model, optimizer, device=torch.device('cuda'))

100%|██████████| 1/1 [00:00<00:00,  7.70it/s]


0.15276597440242767