# Model 2: LSTM sequence model
## Read data

In [1]:
from pathlib import Path
import numpy as np
import torch 

In [2]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [3]:
# Import local modules from 'src/utils' as package 'utils'
import sys; sys.path.insert(0, '../')

## Create Trajectory dataset from dataframe

In [4]:
from utils.file_io import read_trajectory_datasets

In [5]:
data_folder = Path("../../data/")
train_set, test_set, validation_set, visualization_set = read_trajectory_datasets(data_folder, 0.8, 0.15, 0.045, 0.005, 64, standardize_features=True)

Reading .csv files: 1it [00:05,  5.31s/it]


Preprocessing dataframe.


In [6]:
# FIXME: Total loaded size correct?
input_shape, output_shape = 8, 3
print(f"Data shape {input_shape} / {output_shape} of total {len(train_set) + len(test_set) + len(validation_set) + len(visualization_set)} data rows!")

Data shape 8 / 3 of total 84044 data rows!


## Defining the LSTM model

In [7]:
from torch import nn, Tensor

In [8]:
class DecoderLSTM(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int, out_dim: int, dropout_lstm: float = 0.25, dropout_final: float = 0.25,
                 num_lstm_layers: int = 1, bidirectional: bool = False) -> None:
        super().__init__()
        self.total_epochs = 0
        self.hidden_dim = hidden_dim
        self.d = 2 if bidirectional else 1
        self.num_lstm_layers = num_lstm_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_lstm_layers, dropout=dropout_lstm, bidirectional=bidirectional)
        self.final_dropout = nn.Dropout(dropout_final)
        self.out = nn.Linear(hidden_dim * self.d, out_dim)
        
    def forward(self, x: Tensor) -> Tensor:
        batch_size = x.shape[1]
        # expect x to be of shape (sequence_length, batch_size, input_dim)
        h0 = torch.randn(self.d * self.num_lstm_layers, batch_size, self.hidden_dim).to(device)
        c0 = torch.randn(self.d * self.num_lstm_layers, batch_size, self.hidden_dim).to(device)
        # output shape is (sequence_length, batch_size, d * hidden_dim)
        output, (hn, cn) = self.lstm(x, (h0, c0))
        output = self.final_dropout(output)
        return self.out(output)

In [9]:
model = DecoderLSTM(input_shape, 6, output_shape)



## Load parameter, functions and dataloader

In [10]:
import os

from torch.utils.data import DataLoader
from dotenv import load_dotenv

from utils.file_io import save_model
from utils.file_io import define_dataloader_from_subset
from utils.evaluation import compute_loss_on
from utils.loss_functions import maximum_squared_error

In [11]:
model_path = Path("../../models/lstm/")

In [12]:
dotenv_path = model_path / ".env"
load_dotenv(dotenv_path=dotenv_path)

learning_rate = float(os.getenv("LEARNING_RATE"))
batch_size = int(os.getenv("BATCH_SIZE"))
num_epochs = int(os.getenv("NUM_EPOCHS"))

In [13]:
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
loss_function = maximum_squared_error

In [14]:
train_dataloader, validation_dataloader, test_dataloader = define_dataloader_from_subset(train_set, validation_set, test_set, batch_size=batch_size, shuffle=True)

## Define train methods

In [15]:
def train_epoch(train_dataloader: DataLoader, model, loss_function, optimizer, device: torch.device, report_interval: int = 128):
    running_loss = 0
    last_loss = 0

    for i, (inputs, true_values) in enumerate(train_dataloader):

        inputs = inputs.to(device)
        true_values = true_values.to(device)
        
        inputs_shape, true_values_shape = inputs.size(), true_values.size()
        inputs = inputs.view(inputs_shape[1], inputs_shape[0], inputs_shape[2])
        true_values = true_values.view(true_values_shape[1], true_values_shape[0], true_values_shape[2])
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, true_values)
        running_loss += loss
        loss.backward()
        optimizer.step()
    
        if i % report_interval == report_interval - 1:
            last_loss = running_loss / report_interval
            print(f"batch {i + 1}, Mean Squared Error: {last_loss}")
            running_loss = 0
    
    return last_loss

In [16]:
def train(epochs: int, train_dataloader: DataLoader, validation_dataloader: DataLoader, model: nn.Module,
           loss_function, optimizer, checkpoint_path: Path, device: torch.device = 'cpu', report_interval: int = 1000) -> nn.Module:
    
    best_val_loss = float("inf")

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    model.to(device)

    for epoch in range(model.total_epochs, epochs):
        print(f"Epoch: {epoch + 1}")

        model.train(True)
        avg_loss = train_epoch(train_dataloader, model, loss_function, optimizer, device, report_interval)
        model.eval()

        with torch.no_grad():
            avg_val_loss = compute_loss_on(validation_dataloader, model, loss_function, device=device)

        print(f"Loss on train: {avg_loss}, loss on validation: {avg_val_loss}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            model_path = checkpoint_path / f"{checkpoint_path.name}_{epoch}.pt"
            save_model(model, model_path)

        model.total_epochs += 1
    
    return model

## Train the model

In [17]:
last_model = train(num_epochs, train_dataloader, validation_dataloader, model, loss_function, optimizer, model_path, device=device)

Epoch: 1
Loss on train: 0, loss on validation: 6808.64013671875
Epoch: 2
Loss on train: 0, loss on validation: 6529.8212890625
Epoch: 3
Loss on train: 0, loss on validation: 6235.59619140625
Epoch: 4
Loss on train: 0, loss on validation: 5836.76953125
Epoch: 5
Loss on train: 0, loss on validation: 5427.1640625
Epoch: 6
Loss on train: 0, loss on validation: 5049.01513671875
Epoch: 7
Loss on train: 0, loss on validation: 4673.3544921875
Epoch: 8


KeyboardInterrupt: 

## Evaluation

In [None]:
from utils.evaluation import compute_sliding_window_predictions, compute_losses_from
from utils.visualization import create_trace_animation
from matplotlib import pyplot as plt

from IPython.display import HTML

In [None]:
# Compute evaluation on the cpu
device = 'cpu'

### Compute mean squared error

In [None]:
y, y_true = compute_sliding_window_predictions(test_dataloader, last_model, device=device)
test_losses = compute_losses_from(y, y_true, loss_function)
print(f"The mean squared error on test is: {test_losses.mean()}")

### Draw prediction/truth traces

In [None]:
%matplotlib notebook
 
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['figure.dpi'] = 150  

In [None]:
visualization_dataloader = DataLoader(visualization_set, batch_size=batch_size, shuffle=False)
y, y_true = compute_sliding_window_predictions(visualization_dataloader, last_model, device=device)
y, y_true = y.numpy(), y_true.numpy()
y = y.reshape(y.shape[0] * y.shape[1], y.shape[2])
y_true = y_true.reshape(y_true.shape[0] * y_true.shape[1], y_true.shape[2])

In [None]:
animation = create_trace_animation(y, y_true)
HTML(animation.to_jshtml())

## Loading the best model

In [None]:
from utils.file_io import load_model

In [None]:
loaded_model = DecoderLSTM(input_shape, [32, 16], output_shape)
model_state_dict = load_model(model_path)
loaded_model.load_state_dict(model_state_dict)
loaded_model.eval()
y, y_true = compute_predictions(test_dataloader, loaded_model, device)
test_losses = compute_losses_from(y, y_true, loss_function)
print(f"The mean squared error of the loaded model on test is: {test_losses.mean()}")
animation = create_trace_animation(y.numpy(), y_true.numpy())
HTML(animation.to_jshtml())