### RNN in PyTorch

References:
- https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
- https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict

from torch import nn
from torch.utils.data import Dataset, DataLoader

### Settings

In [None]:
device = "cpu"

### Dataset

In [None]:
num_steps = 1000
x = np.arange(0, num_steps)
y = np.sin(x / 20)

# Cast to float 32
x = x.astype(np.float32)
y = y.astype(np.float32)

fig, ax = plt.subplots(1, 1, figsize=(6, 2))
_ = ax.plot(x, y)

In [None]:
class SinDataset(Dataset):
    def __init__(self, X, timesteps):
        self.X = X
        self.timesteps = timesteps

    def __len__(self):
        return len(self.X) - timesteps - 1

    def __getitem__(self, idx):
        st = idx
        ed = idx + self.timesteps

        x = self.X[st:ed, np.newaxis]
        y = self.X[[ed]]

        return x, y

### Model

In [None]:
class RNN(nn.Module):
    def __init__(self, timesteps, num_ft, hidden_units):
        super().__init__()

        self.num_ft = num_ft
        self.timesteps = timesteps
        self.hidden_units = hidden_units

        self.rnn = nn.RNN(num_ft, hidden_units, 1)
        self.linear = nn.Linear(hidden_units, 1)
        self.sigmoid = nn.Sigmoid()

    def __call__(self, x):
        # Inital hidden state
        h0 = torch.randn(1, self.timesteps, self.hidden_units)

        # Pass input through RNN
        out, h_n = self.rnn(x, h0)

        # Get last state
        out = out[:, -1]

        # Pass last state through linear layer
        out = self.linear(out)
        out = self.sigmoid(out)

        return out

Check model

Building our module and running through one batch could help debugging the model.

In [None]:
# Build DataLoader
timesteps = 5

sin_ds = SinDataset(y, timesteps=timesteps)
train_dl = DataLoader(sin_ds, batch_size=8, shuffle=False)

batch = next(iter(train_dl))
print(batch[0].shape, batch[1].shape)

model = RNN(timesteps, num_ft=1, hidden_units=12)
_ = model(batch[0])

#### Training

In [None]:
def train_loop(epoch, dataloader, model, loss_fn, optimizer, history=None):
    # Set train mode
    model.train()

    train_loss_batch = []
    size = len(dataloader.dataset)

    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss, current = loss.item(), (batch + 1) * len(X)
        print(f"Epoch:{epoch} loss: {loss:>7f}  [{current:>5d}/{size:>5d}]", end="\r")

        train_loss_batch.append(loss)

    # End of epoch
    print(f"Epoch:{epoch} loss: {loss:>7f}  [{size:>5d}/{size:>5d}]")

    # Save loss
    if isinstance(history, defaultdict):
        train_loss = sum(train_loss_batch) / len(train_loss_batch)
        history["loss"].append(train_loss)


def test_loop(epoch, dataloader, model, loss_fn, history=None):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    val_loss, correct = 0, 0

    # Set evaluation mode
    model.eval()

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)

            val_loss += loss_fn(pred, y).item()

    val_loss /= num_batches
    print(
        f"Epoch:{epoch} Avg loss: {val_loss:>8f} \n"
    )

    if isinstance(history, defaultdict):
        history["val_loss"].append(val_loss)

In [None]:
timesteps = 5
num_epochs = 100

# Keep track of model metrics
history = defaultdict(list)

# Model hyperparameters
batch_size = 128
learning_rate = 0.001

# Build DataLoader
train_dataset = SinDataset(y, timesteps=timesteps)
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize model
model = RNN(timesteps, num_ft=1, hidden_units=12).to(device)

# Initialize the loss function
loss_fn = nn.MSELoss()

# Initalizer loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

For simplicity we are evaluating in the same dataset

You should always evaluate model performance on a separate holdout set

In [None]:
# Train model for `num_epochs
for epoch in range(num_epochs):
    train_loop(epoch, train_dl, model, loss_fn, optimizer, history)
    test_loop(epoch, train_dl, model, loss_fn, history)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4))

_ = ax.plot(history["loss"], label="loss")
_ = ax.plot(history["val_loss"], label="val_loss")
_ = plt.legend()

#### Evaluation

In [None]:
y_pred = []
y_true = []

for batch in train_dl:
    y_pred_batch = model(batch[0])
    y_true_batch = batch[1]

    y_pred.extend(y_pred_batch[:, 0].detach().cpu().numpy())
    y_true.extend(y_true_batch[:, 0].detach().cpu().numpy())

y_pred = np.array(y_pred)
y_true = np.array(y_true)

In [None]:
mse = np.mean((y_true - y_pred) ** 2)
print(f'MSE: {mse:.4}')