# Model Playground

Sources:
- Time-series Transformer guide: <https://towardsdatascience.com/the-time-series-transformer-2a521a0efad3>
- Time2Vec embedding: <https://arxiv.org/pdf/1907.05321.pdf>

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

## Load Datasets

In [2]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

In [3]:
if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/gdrive")
    dataset_root = "/content/gdrive/My Drive/Virginia Tech/graduate/research/makassar/repos/makassar-ml/datasets/"
else:
    dataset_root = "../datasets/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### Dataset: Beijing PM2.5

In [4]:
csvfile = os.path.join(dataset_root, "beijing_pm2.5", "PRSA_data_2010.1.1-2014.12.31.csv")
fields = ['year','month','day','hour','DEWP','TEMP','PRES','Is','Ir'] # Specific columns to use.
df = pd.read_csv(csvfile, usecols=fields)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43824 entries, 0 to 43823
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   year    43824 non-null  int64  
 1   month   43824 non-null  int64  
 2   day     43824 non-null  int64  
 3   hour    43824 non-null  int64  
 4   DEWP    43824 non-null  int64  
 5   TEMP    43824 non-null  float64
 6   PRES    43824 non-null  float64
 7   Is      43824 non-null  int64  
 8   Ir      43824 non-null  int64  
dtypes: float64(2), int64(7)
memory usage: 3.0 MB


In [5]:
# Create single date column from independent year/month/day columns.
df = df.assign(date=pd.to_datetime(df[['year','month','day','hour']]))
df.head()

Unnamed: 0,year,month,day,hour,DEWP,TEMP,PRES,Is,Ir,date
0,2010,1,1,0,-21,-11.0,1021.0,0,0,2010-01-01 00:00:00
1,2010,1,1,1,-21,-12.0,1020.0,0,0,2010-01-01 01:00:00
2,2010,1,1,2,-21,-11.0,1019.0,0,0,2010-01-01 02:00:00
3,2010,1,1,3,-21,-14.0,1019.0,0,0,2010-01-01 03:00:00
4,2010,1,1,4,-20,-12.0,1018.0,0,0,2010-01-01 04:00:00


In [6]:
# Add health scores to the dataset for specific plants.
# These scores are normalized between [0,1].
features = ['tomato', 'sunflower', 'cucumber']
df = df.assign(**{feat:np.random.uniform(0.0, 1.0, size=df.shape[0]) for feat in features})
df.head()

Unnamed: 0,year,month,day,hour,DEWP,TEMP,PRES,Is,Ir,date,tomato,sunflower,cucumber
0,2010,1,1,0,-21,-11.0,1021.0,0,0,2010-01-01 00:00:00,0.01644,0.971716,0.430654
1,2010,1,1,1,-21,-12.0,1020.0,0,0,2010-01-01 01:00:00,0.73973,0.838695,0.563574
2,2010,1,1,2,-21,-11.0,1019.0,0,0,2010-01-01 02:00:00,0.300257,0.810685,0.262789
3,2010,1,1,3,-21,-14.0,1019.0,0,0,2010-01-01 03:00:00,0.668707,0.566183,0.416542
4,2010,1,1,4,-20,-12.0,1018.0,0,0,2010-01-01 04:00:00,0.289482,0.65211,0.521938


In [7]:
# Separate dataset into source (input) and target (output).
df_src = df[['date', 'DEWP', 'TEMP', 'PRES', 'Is', 'Ir']]
df_tgt = df[['tomato', 'sunflower', 'cucumber']]

## Model Definition

In [8]:
import torch.nn

#### Transformer for Time-Series Forecasting

In [9]:
class TimeSeriesTransformer(torch.nn.Module):

    def __init__(self,
        n_encoder_inputs: int,
        n_decoder_inputs: int,
        d_model: int = 512,
        dropout: float = 0.1,
        ):
        super().__init__()

        # Linear transformation from input-feature space into arbitrary n-dimension space.
        # This is similar to a word embedding used in NLP tasks.
        self.encoder_projection = torch.nn.Linear(in_features=n_encoder_inputs, out_features=d_model)
        self.decoder_projection = torch.nn.Linear(in_features=n_decoder_inputs, out_features=d_model)

        # Transformer encoder/decoder layers.
        encoder_layer = torch.nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=8, # Number of multihead-attention models.
            dropout=dropout,
            dim_feedforward=4*d_model,
        )
        decoder_layer = torch.nn.TransformerDecoderLayer(
            d_model=d_model,
            nhead=8, # Number of multihead-attention models.
            dropout=dropout,
            dim_feedforward=4*d_model,
        )
        self.encoder = torch.nn.TransformerEncoder(encoder_layer=encoder_layer, num_layers=8)
        self.decoder = torch.nn.TransformerDecoder(decoder_layer=decoder_layer, num_layers=8)

        # Linear output layer.
        # We only predict a single data point at a time, so output features is 1.
        self.linear = torch.nn.Linear(in_features=d_model, out_features=1)


    def encode(self, src):
        pass


    def decode(self, tgt, memory):
        pass


    def forward(self, src, tgt):
        x = self.encode(src)
        x = self.decode(tgt=tgt, memory=x)
        return x

In [10]:
# Prediction problem setup.
#
# Given 24 hours of data points, predict the next 1 hour of data points.
n_encoder_inputs = 24 # Number of data points in input sequence.
n_decoder_inputs = 1 # Number of data points in output sequence.

d_model = 512 # Latent dimension.
dropout = 0.1

# Create new model.
model = TimeSeriesTransformer(
    n_encoder_inputs,
    n_decoder_inputs,
    d_model,
    dropout,
)

In [11]:
model

TimeSeriesTransformer(
  (encoder_projection): Linear(in_features=24, out_features=512, bias=True)
  (decoder_projection): Linear(in_features=1, out_features=512, bias=True)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_f