# Model Playground

Sources:
- Time-series Transformer guide: <https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3>
- Time2Vec embedding: <https://arxiv.org/pdf/1907.05321.pdf>

In [113]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

## Load Datasets

In [114]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

In [115]:
if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/gdrive")
    dataset_root = "/content/gdrive/My Drive/Virginia Tech/graduate/research/makassar/repos/makassar-ml/datasets/"
else:
    dataset_root = "../datasets/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### Dataset: Beijing PM2.5

In [116]:
import torch.utils.data

In [117]:
class BeijingPM25Dataset(torch.utils.data.Dataset):

    def __init__(self, path: str):

        # Read the input file.
        fields = ['year','month','day','hour','DEWP','TEMP','PRES','Is','Ir'] # Specific columns to use.
        self.df = pd.read_csv(path, usecols=fields)

        # # Create single date column from independent year/month/day columns.
        # self.df = self.df.assign(date=pd.to_datetime(df[['year','month','day','hour']]))

        # Add health scores to the dataset for specific plants.
        # These scores are normalized between [0,1].
        features = ['tomato', 'sunflower', 'cucumber']
        self.df = self.df.assign(**{feat:np.random.uniform(0.0, 1.0, size=self.df.shape[0]) for feat in features})

        # Separate dataset into source (input) and target (output).
        # self.src = df[['date', 'DEWP', 'TEMP', 'PRES', 'Is', 'Ir']].to_numpy()
        self.src = self.df[['year','month','day','hour', 'DEWP', 'TEMP', 'PRES', 'Is', 'Ir']].to_numpy()
        self.tgt = self.df[['tomato', 'sunflower', 'cucumber']].to_numpy()

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        src = self.src[index]
        tgt = self.tgt[index]
        return src, tgt

In [118]:
# Load the dataset from file.
csvfile = os.path.join(dataset_root, "beijing_pm2.5", "PRSA_data_2010.1.1-2014.12.31.csv")
dataset = BeijingPM25Dataset(csvfile)

In [119]:
# Create a dataset loader to assist with batching.
batch_size = 32
loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)

## Model Definition

In [120]:
import torch.nn

#### Transformer for Time-Series Forecasting

In [121]:
def create_attn_mask(length: int, device: str = None):
    """Generate mask used for attention mechanisms.

    Masks are a lower-triangular matrix of zeros
    with the other entries taking value "-inf".

    Args:
        length (int): Length of square-matrix dimension.
        device (str, optional): PyTorch device.

    Examples:

        >>> create_mask(3)
        tensor([[0., -inf, -inf],
                [0., 0., -inf],
                [0., 0., 0.]])
    """
    # Get lower-triangular matrix of ones.
    mask = torch.tril(torch.ones(length, length, device=device))

    # Replace 0 -> "-inf" and 1 -> 0.0
    mask = (
        mask
        .masked_fill(mask == 0, float("-inf"))
        .masked_fill(mask == 1, float(0.0))
    )
    return mask

In [122]:
class TimeSeriesTransformer(torch.nn.Module):

    def __init__(self,
        n_encoder_inputs: int,
        n_decoder_inputs: int,
        d_model: int = 512,
        dropout: float = 0.1,
        batch_first: bool = False,
        ):
        super().__init__()

        self.batch_first = batch_first

        # Linear transformation from input-feature space into arbitrary n-dimension space.
        # This is similar to a word embedding used in NLP tasks.
        self.encoder_projection = torch.nn.Linear(in_features=n_encoder_inputs, out_features=d_model)
        self.decoder_projection = torch.nn.Linear(in_features=n_decoder_inputs, out_features=d_model)

        # Transformer encoder/decoder layers.
        encoder_layer = torch.nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=8, # Number of multihead-attention models.
            dropout=dropout,
            dim_feedforward=4*d_model,
            batch_first=batch_first,
        )
        decoder_layer = torch.nn.TransformerDecoderLayer(
            d_model=d_model,
            nhead=8, # Number of multihead-attention models.
            dropout=dropout,
            dim_feedforward=4*d_model,
            batch_first=batch_first,
        )
        self.encoder = torch.nn.TransformerEncoder(encoder_layer=encoder_layer, num_layers=8)
        self.decoder = torch.nn.TransformerDecoder(decoder_layer=decoder_layer, num_layers=8)

        # Linear output layer.
        # We only predict a single data point at a time, so output features is 1.
        self.linear = torch.nn.Linear(in_features=d_model, out_features=1)


    def encode(self, src):
        # Transform source into arbitrary feature space.
        x = self.encoder_projection(src)

        # Pass the linear transformation through the encoder layers.
        x = self.encoder(x)

        return x


    def decode(self, tgt, memory):
        # Transform target into arbitrary feature space.
        x = self.decoder_projection(tgt)

        # Create target attention mask.
        if self.batch_first:
            tgt_length, batch_size = tgt.size(1), tgt.size(0)
        else:
            tgt_length, batch_size = tgt.size(0), tgt.size(1)
        tgt_mask = create_attn_mask(length=tgt_length, device=tgt.device)

        # Pass the linear transformation through the decoder layers.
        x = self.decoder(tgt=x, memory=memory, tgt_mask=tgt_mask)

        # Pass the output of the decoder through the linear prediction layer.
        x = self.linear(x)

        return x


    def forward(self, x):
        src, tgt = x
        y = self.encode(src)
        y = self.decode(tgt=tgt, memory=y)
        return y

In [123]:
# Prediction problem setup.
#
# Given 24 hours of data points, predict the next 1 hour of data points.
n_encoder_inputs = 24 # Number of data points in input sequence.
n_decoder_inputs = 1 # Number of data points in output sequence.

d_model = 512 # Latent dimension.
dropout = 0.1

# Create new model.
model = TimeSeriesTransformer(
    n_encoder_inputs,
    n_decoder_inputs,
    d_model,
    dropout,
)

In [124]:
# Test the forward method.
n_records = 10
src = torch.rand(size=(batch_size, n_records, n_encoder_inputs))
target_in = torch.rand(size=(batch_size, n_records, n_decoder_inputs))

pred = model((src, target_in))
print(pred.size())

torch.Size([32, 10, 1])
