### Temporal Convolutional Network (TCN) for time series forecasting

TCN model original code:
- https://github.com/locuslab/TCN

In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import defaultdict

from torch import nn
from torch.nn.utils import weight_norm
from torch.utils.data import Dataset, DataLoader

### Settings

In [None]:
device = "cpu"

### Data

In [None]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/AileenNielsen/"
    "TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv",
    index_col=0,
    parse_dates=True,
)
print(f"# of samples: {len(df)}")

df["%Chg Passengers"] = df["#Passengers"].pct_change(1)

# For simplicity, fill nans with 0
# Note that the right strategy should be determined based
# on project, target and dataset
df["%Chg Passengers"] = df["%Chg Passengers"].fillna(0)

# Visualize passenger data
fig, ax = plt.subplots(2, 1, figsize=(6, 4))
_ = df[["#Passengers"]].plot(ax=ax[0])
_ = df[["%Chg Passengers"]].plot(ax=ax[1])
_ = plt.tight_layout()

In [None]:
# Univariate time series to forecast
y = df["%Chg Passengers"].values

# Cast to float32 (default pytorch float type)
y = y.astype(np.float32)

# Split dataset in train/test sets
pct_train = 0.8
n_train_samples = int(len(y) * pct_train)

y_train = y[:n_train_samples]
y_val = y[n_train_samples:]

In [None]:
class PassengerDataset(Dataset):
    def __init__(self, X, timesteps):
        self.X = X
        self.timesteps = timesteps

    def __len__(self):
        return len(self.X) - timesteps - 1

    def __getitem__(self, idx):
        st = idx
        ed = idx + self.timesteps

        x = self.X[st:ed, np.newaxis]
        y = self.X[[ed]]

        return x, y

### Model

In [None]:
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, : -self.chomp_size]


class SquezeeChannels(nn.Module):
    def __init__(self):
        super(SquezeeChannels, self).__init__()

    def forward(self, x):
        return torch.squeeze(x, dim=-1)


class CausalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, dilation, dropout=0.2):
        super(CausalBlock, self).__init__()

        # Initally pad both sizes but discard right padding
        padding = (kernel_size - 1) * dilation

        self.conv1d = nn.Conv1d(
            n_inputs, n_outputs, kernel_size, padding=padding, dilation=dilation
        )
        # Discard right padding elements to make conv causal
        self.comp1d = Chomp1d(padding)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1d, self.comp1d, self.relu, self.dropout)

        self.upperdownsample = (
            torch.nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        )
        self.init_weights()

    def init_weights(self):
        self.conv1d.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)

        if self.upperdownsample is None:
            res = x
        else:
            res = self.upperdownsample(x)

        return self.relu(out + res)


class CausalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(CausalConvNet, self).__init__()
        layers = []

        # Number of layers determined by list of num_channels
        num_layers = len(num_channels)

        for i in range(num_layers):
            # Dilation grows by 2x on each level
            dilation_size = 2**i

            # Initial inputs channels equals `num_inputs
            in_channels = num_inputs if i == 0 else num_channels[i - 1]
            out_channels = num_channels[i]

            layers += [
                CausalBlock(
                    in_channels,
                    out_channels,
                    kernel_size,
                    dilation=dilation_size,
                    dropout=dropout,
                )
            ]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [None]:
class TCN(nn.Module):
    def __init__(self, input_size, num_channels, kernel_size=2, dropout=0.2):
        super(TCN, self).__init__()

        # Linear input size
        out_channels = num_channels[-1]

        # Network architecture
        self.tcn = CausalConvNet(
            input_size, num_channels, kernel_size=kernel_size, dropout=dropout
        )
        self.dropout = nn.Dropout(dropout)
        self.squeeze = SquezeeChannels()
        self.linear = nn.Linear(num_channels[-1], 1)

        self.network = torch.nn.Sequential(
            self.tcn, self.dropout, self.squeeze, self.linear
        )

    def forward(self, x):
        return self.network(x)

#### Training

In [None]:
def train_loop(epoch, dataloader, model, loss_fn, optimizer, history=None):
    # Set train mode
    model.train()

    train_loss_batch = []
    size = len(dataloader.dataset)

    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 500 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(
                f"Epoch:{epoch} loss: {loss:>7f}  [{current:>5d}/{size:>5d}]", end="\r"
            )
            train_loss_batch.append(loss)

    # End of epoch
    print(f"Epoch:{epoch} loss: {loss:>7f}  [{size:>5d}/{size:>5d}]")

    # Save loss
    if isinstance(history, defaultdict):
        train_loss = sum(train_loss_batch) / len(train_loss_batch)
        history["loss"].append(train_loss)


def test_loop(epoch, dataloader, model, loss_fn, history=None):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    val_loss, correct = 0, 0

    # Set evaluation mode
    model.eval()

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)

            val_loss += loss_fn(pred, y).item()

    val_loss /= num_batches
    print(f"Epoch:{epoch} Avg loss: {val_loss:>8f} \n")

    if isinstance(history, defaultdict):
        history["val_loss"].append(val_loss)

In [None]:
timesteps = 5
num_epochs = 50

# Keep track of model metrics
history = defaultdict(list)

# Model hyperparameters
batch_size = 128
learning_rate = 0.001

# Build DataLoader
train_dataset = PassengerDataset(y_train, timesteps=timesteps)
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

val_dataset = PassengerDataset(y_val, timesteps=timesteps)
val_dl = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Initialize model
model = TCN(input_size=timesteps, num_channels=[12, 24], kernel_size=2).to(device)

# Initialize the loss function
loss_fn = nn.MSELoss()

# Initalizer loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    train_loop(epoch, train_dl, model, loss_fn, optimizer, history)
    test_loop(epoch, train_dl, model, loss_fn, history)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4))

_ = ax.plot(history["loss"], label="loss")
_ = ax.plot(history["val_loss"], label="val_loss")

### Evaluation

In [None]:
y_pred = []
y_true = []

for batch in val_dl:
    y_pred_batch = model(batch[0])
    y_true_batch = batch[1]

    y_pred.extend(y_pred_batch[:, 0].detach().cpu().numpy())
    y_true.extend(y_true_batch[:, 0].detach().cpu().numpy())

y_pred = np.array(y_pred)
y_true = np.array(y_true)

In [None]:
mse = np.mean((y_true - y_pred) ** 2)
print(f"MSE: {mse:.4}")

In [None]:
mse = np.mean((y_true[1:] - y_true[:-1]) ** 2)
print(f"MSE: {mse:.4}")

Visualize prediction

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4))

_ = ax.plot(y_pred, label="pred")
_ = ax.plot(y_true, label="true")