In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset

# RNNs & LSTMs

## Data Loading

In [None]:
LOCATION = "Nelson St"
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["time"])
cycle_counts = cycle_counts[cycle_counts["location"] == LOCATION]

In [None]:
fig, ax = plt.subplots()
ax.plot(cycle_counts["time"], cycle_counts["count"], lw=1.5)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

In [None]:
cycle_counts["time"] = pd.to_datetime(cycle_counts["time"])
cycle_counts = cycle_counts.set_index("time").drop(columns=["location"])
cycle_counts = cycle_counts.resample("D").sum().interpolate()

## Data Utils

In [None]:
class Scaler():
    def __init__(self):
        self.mean_: float | None = None
        self.scale_: float | None = None
    
    @property
    def is_fit(self) -> bool:
        return self.mean_ is not None and self.scale_ is not None
    
    def fit_transform(self, y: pd.Series) -> pd.Series:
        self.mean_ = y.mean()
        self.scale_ = y.std()
        return self.transform(y)

    def transform(self, y: pd.Series) -> pd.Series:
        assert self.is_fit
        return (y - self.mean_) / self.scale_

    def inverse_transform(self, y: pd.Series) -> pd.Series: 
        assert self.is_fit
        return y * self.scale_ + self.mean_

In [None]:
def get_train_dataset(
    timeseries: pd.Series,
    in_seq_length: int,
    out_seq_length: int,
    batched_input: bool = False,
    batched_output: bool = False
) -> tuple[torch.Tensor, torch.Tensor]:
    
    X_train, y_train = [], []
    
    last_ts_idx = len(timeseries) - in_seq_length
    last_ts_idx -= out_seq_length if out_seq_length > 1 else 0
    for i in range(last_ts_idx):
        # Get the features
        feat_start, feat_end = i, i + in_seq_length
        feat_seq = timeseries.iloc[feat_start: feat_end]
        X_train.append(feat_seq.values)

        # Get the labels
        if batched_output:
            # Get an output sequence for each value of the input sequence
            for j in range(in_seq_length):
                label_start = i + j + 1
                label_end = i + j + out_seq_length + 1
                label_seq = timeseries.iloc[label_start: label_end]
                y_train.append(label_seq.values)
        
        else:
            # Only get a single output seq for each input sequence
            label_start = i + in_seq_length
            label_end = label_start + out_seq_length
            label_seq = timeseries.iloc[label_start: label_end]
            y_train.append(label_seq.values)

    X_train = torch.tensor(np.array(X_train), dtype=torch.float)
    X_train = (
        X_train.view(-1, in_seq_length)
        if not batched_input 
        else X_train.view(-1, in_seq_length, 1)
    )

    y_train = torch.tensor(np.array(y_train), dtype=torch.float32)
    y_train = (
        y_train.view(-1, out_seq_length)
        if not batched_output 
        else y_train.view(-1, in_seq_length, out_seq_length)
    )

    return X_train, y_train

In [None]:
def get_test_dataset_and_index(
    timeseries: pd.Series,
    in_seq_length: int,
    out_seq_length: int,
    batched_input: bool = False,
):
    
    X_test, test_index = [], []
    max_X_index = len(timeseries) - in_seq_length - out_seq_length
    for i in range(0, max_X_index, out_seq_length):
        in_ = timeseries.iloc[i: i + in_seq_length]
        X_test.append(in_)

        index = timeseries.iloc[i + in_seq_length: i + in_seq_length + out_seq_length].index
        test_index.append(index)

    X_test = torch.tensor(np.array(X_test), dtype=torch.float32)
    X_test = (
        X_test.view(-1, in_seq_length)
        if not batched_input
        else X_test.view(-1, in_seq_length, 1)
    )
    return X_test, test_index

# RNNs

### Forecast a single timestep ahead

In [None]:
class RNN_1(nn.Module):
    def __init__(self, input_size: int = 1, hidden_size: int = 25):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.hh = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        self.ho = nn.Linear(in_features=hidden_size, out_features=1)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Reshape to (batch_size, seq_len)
        x = x.view(-1, 3)
        
        # Initialise the hidden state
        h = torch.zeros(x.shape[0], self.hidden_size, dtype=torch.float32)
        
        # Hidden state after first input
        h = F.relu(self.ih(x[:, 0].view(-1, 1)) + self.hh(h))

        # Hidden state after second input
        h = F.relu(self.ih(x[:, 1].view(-1, 1)) + self.hh(h))

        # Hidden state after thrid input
        h = F.relu(self.ih(x[:, 2].view(-1, 1)) + self.hh(h))

        # Output based on current hidden state
        return self.ho(h)

In [None]:
# TODO: Hide some of the logic here as an excercise

scaler = Scaler()
scaled_counts = scaler.fit_transform(cycle_counts["count"])

in_seq_len = 3
out_seq_len = 1
X_train, y_train = [], []
for i in range(len(scaled_counts) - in_seq_len):
    feat_seq = scaled_counts.iloc[i: i + in_seq_len]
    X_train.append(feat_seq.values)

    label_seq = scaled_counts.iloc[i + in_seq_len: i + in_seq_len + out_seq_len]
    y_train.append(label_seq.values)

X_train = np.array(X_train).reshape(-1, in_seq_len)
y_train = np.array(y_train).reshape(-1, out_seq_len)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)

In [None]:
# Training loop

# TODO: Hide some of the logic here as an excercise

model = RNN_1()
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-03)

n_epochs = 50

n_samples = X_train.shape[0]
batch_size = 30
n_batches = n_samples // batch_size + 1

model.train()

for epoch in range(n_epochs):
    for batch in range(n_batches):
        batch_start = batch * batch_size
        batch_end = batch_start + batch_size
        X_batch = X_train[batch_start: batch_end]
        y_batch = y_train[batch_start: batch_end]

        # Zero gradients at the start of each new batch
        # Otherwise gradients are accumulated between batches
        optimizer.zero_grad()
        
        # Forward pass through the model
        y_hat = model(X_batch)

        # Backward pass computes gradients of all model weights
        loss = loss_fn(y_batch, y_hat)
        loss.backward()
        
        # Adjust weights according to gradients
        optimizer.step()


In [None]:
# Check the fitted values
model.eval()
with torch.no_grad():
    fitted_values = model(X_train)

fitted_values = fitted_values.numpy().reshape(-1)
fitted_values = pd.Series(fitted_values, index=scaled_counts.iloc[in_seq_len:].index)
fitted_values = scaler.inverse_transform(fitted_values)

fig, ax = plt.subplots()
ax.plot(cycle_counts.index, cycle_counts["count"], lw=1.5)
ax.plot(fitted_values)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

### Forecast multiple timesteps ahead

In [None]:
# TODO: Remove some logic to make an excercise

class RNN_2(nn.Module):
    def __init__(
        self,
        in_seq_length: int,
        out_seq_length: int,
        input_size: int = 1,
        hidden_size: int = 25
    ):
        super().__init__()
        self.in_seq_length = in_seq_length
        self.out_seq_length = out_seq_length
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.hh = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        self.ho = nn.Linear(in_features=hidden_size, out_features=out_seq_length)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Reshape to (batch_size, seq_len)
        x = x.view(-1, self.in_seq_length)
        
        # Initialise the hidden state
        h = torch.zeros(x.shape[0], self.hidden_size, dtype=torch.float32)

        # Collect out sequence at every timestep of the input seq
        outs = []

        # Loop over every input in the input seq
        for i in range(self.in_seq_length):
            h = F.relu(self.ih(x[:, i].view(-1, self.input_size)) + self.hh(h))
            outs.append(self.ho(h))

        outs = torch.stack(outs, dim=1)
        return outs

In [None]:
scaler = Scaler()
scaled_counts = scaler.fit_transform(cycle_counts["count"])

in_seq_length, out_seq_length = 120, 21
X_train, y_train = get_train_dataset(
    scaled_counts,
    in_seq_length,
    out_seq_length,
    batched_output=True
)

In [None]:
# Training loop

model = RNN_2(in_seq_length=in_seq_length, out_seq_length=out_seq_length)
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-03)

n_epochs = 300

n_samples = X_train.shape[0]
batch_size = 30
n_batches = n_samples // batch_size + 1

model.train()

for epoch in range(n_epochs):
    for batch in range(n_batches):
        batch_start = batch * batch_size
        batch_end = batch_start + batch_size
        X_batch = X_train[batch_start: batch_end]
        y_batch = y_train[batch_start: batch_end]

        # Zero gradients at the start of each new batch
        # Otherwise gradients are accumulated between batches
        optimizer.zero_grad()
        
        # Forward pass through the model
        y_hat = model(X_batch)

        # Backward pass computes gradients of all model weights
        loss = loss_fn(y_batch, y_hat)
        loss.backward()
        
        # Adjust weights according to gradients
        optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{n_epochs}], Loss: {loss.item():.4f}')
        print("------------------------------")

In [None]:
# Fitted values
X_test, test_index = get_test_dataset_and_index(
    timeseries=scaled_counts,
    in_seq_length=in_seq_length,
    out_seq_length=out_seq_length,
    batched_input=False,
)

model.eval()
with torch.no_grad():
    outputs = model(X_test)
outputs = outputs[:, -1, :].numpy()

In [None]:
# Inverse transform
fitted_values = []
for i, out_seq in enumerate(outputs):
    out_values = pd.Series(out_seq, index=test_index[i])
    out_values = scaler.inverse_transform(out_values)
    fitted_values.append(out_values)

In [None]:
fig, ax = plt.subplots()

ax.plot(cycle_counts.index[100:], cycle_counts["count"][100:], lw=1.5, label="Observed")
for i, values in enumerate(fitted_values):
    ax.plot(values, color="tab:orange", label="Fitted" if i == 0 else "")

ax.legend(loc=1)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

### Multilayer RNNs

In [None]:
# TODO: Remove some logic to make an excercise

class RNN_3(nn.Module):
    def __init__(
        self,
        in_seq_length: int,
        out_seq_length: int,
        input_size: int = 1,
        hidden_size: int = 25,
        num_layers: int = 1
    ):
        super().__init__()
        self.in_seq_length = in_seq_length
        self.out_seq_length = out_seq_length
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.ho = nn.Linear(in_features=hidden_size, out_features=out_seq_length)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out_, _= self.rnn(x)
        return self.ho(out_)

In [None]:
scaler = Scaler()
scaled_counts = scaler.fit_transform(cycle_counts["count"])

in_seq_length, out_seq_length = 120, 21
X_train, y_train = get_train_dataset(
    scaled_counts,
    in_seq_length=in_seq_length,
    out_seq_length=out_seq_length,
    batched_input=True,
    batched_output=True,
)

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=32)

In [None]:
model = RNN_3(in_seq_length=in_seq_length, out_seq_length=out_seq_length, num_layers=3)
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-03)

model.train()

n_epochs = 300
for epoch in range(n_epochs):
    for X_train, y_train in dataloader:
        optimizer.zero_grad()
        y_hat = model(X_train)
        loss = loss_fn(y_train, y_hat)
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{n_epochs}], Loss: {loss.item():.4f}')
        print("------------------------------")

In [None]:
# Fitted values
X_test, test_index = get_test_dataset_and_index(
    timeseries=scaled_counts,
    in_seq_length=in_seq_length,
    out_seq_length=out_seq_length,
    batched_input=True,
)

model.eval()
with torch.no_grad():
    outputs = model(X_test)
outputs = outputs[:, -1, :].numpy()

In [None]:
# Inverse transform
fitted_values = []
for i, out_seq in enumerate(outputs):
    out_values = pd.Series(out_seq, index=test_index[i])
    out_values = scaler.inverse_transform(out_values)
    fitted_values.append(out_values)

In [None]:
fig, ax = plt.subplots()

ax.plot(cycle_counts.index[100:], cycle_counts["count"][100:], lw=1.5, label="Observed")
for i, values in enumerate(fitted_values):
    ax.plot(values, color="tab:orange", label="Fitted" if i == 0 else "")

ax.legend(loc=1)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

## LSTM

In [None]:
# TODO: Remove some logic to make an excercise

class LSTM(nn.Module):
    def __init__(
        self,
        out_seq_length: int,
        input_size: int = 1,
        hidden_size: int = 25,
        num_layers: int = 1,
    ):
        super().__init__()
        self.out_seq_length = out_seq_length
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(in_features=hidden_size, out_features=out_seq_length)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.linear(out)
        return out

In [None]:
# Prepare the data
scaler = Scaler()
scaled_counts = scaler.fit_transform(cycle_counts["count"])

in_seq_length, out_seq_length = 120, 21
X_train, y_train = get_train_dataset(
    scaled_counts,
    in_seq_length=in_seq_length,
    out_seq_length=out_seq_length,
    batched_input=True,
    batched_output=True,
)

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=32)

In [None]:
# Train the model
model = LSTM(out_seq_length=out_seq_length)
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-03)

model.train()

n_epochs = 300
for epoch in range(n_epochs):
    for X_train, y_train in dataloader:
        optimizer.zero_grad()
        y_hat = model(X_train)
        loss = loss_fn(y_train, y_hat)
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{n_epochs}], Loss: {loss.item():.4f}')
        print("------------------------------")

In [None]:
# Fitted values
X_test, test_index = get_test_dataset_and_index(
    timeseries=scaled_counts,
    in_seq_length=in_seq_length,
    out_seq_length=out_seq_length,
    batched_input=True,
)

model.eval()
with torch.no_grad():
    outputs = model(X_test)
outputs = outputs[:, -1, :].numpy()

In [None]:
# Inverse transform
fitted_values = []
for i, out_seq in enumerate(outputs):
    out_values = pd.Series(out_seq, index=test_index[i])
    out_values = scaler.inverse_transform(out_values)
    fitted_values.append(out_values)

In [None]:
fig, ax = plt.subplots()

ax.plot(cycle_counts.index[100:], cycle_counts["count"][100:], lw=1.5, label="Observed")
for i, values in enumerate(fitted_values):
    ax.plot(values, color="tab:orange", label="Fitted" if i == 0 else "")

ax.legend(loc=1)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();