## Install dependencies (`pip` and `l5kit`)

In [None]:
!pip install -q --no-index -f ../input/kaggle-l5kit-110 pip
!pip install -q --no-index -f ../input/kaggle-l5kit-110 l5kit

## Make sure we have the correct version

In [None]:
import l5kit
import torch
import torchvision
l5kit.__version__, torch.__version__, torchvision.__version__, torch.cuda.is_available()

And also CUDA is still usable

In [None]:
assert torch.cuda.is_available(), "must be on GPU to train"
torch.device('cuda')

## Configuration via YAML

In [None]:
%%writefile agent_motion_config.yml
# sample from
# https://github.com/lyft/l5kit/blob/master/examples/agent_motion_prediction/agent_motion_config.yaml

# Config format schema number
format_version: 4

###################
## Model options
model_params:
  model_architecture: "resnet18"

  history_num_frames: 0
  history_step_size: 1
  history_delta_time: 0.1

  future_num_frames: 50
  future_step_size: 1
  future_delta_time: 0.1

###################
## Input raster parameters
raster_params:
  # raster image size [pixels]
  raster_size:
    - 224
    - 224
  # raster's spatial resolution [meters per pixel]: the size in the real world one pixel corresponds to.
  pixel_size:
    - 0.5
    - 0.5
  # From 0 to 1 per axis, [0.5,0.5] would show the ego centered in the image.
  ego_center:
    - 0.25
    - 0.5
  map_type: "py_semantic"

  # the keys are relative to the dataset environment variable
  satellite_map_key: "aerial_map/aerial_map.png"
  semantic_map_key: "semantic_map/semantic_map.pb"
  dataset_meta_key: "meta.json"

  # e.g. 0.0 include every obstacle, 0.5 show those obstacles with >0.5 probability of being
  # one of the classes we care about (cars, bikes, peds, etc.), >=1.0 filter all other agents.
  filter_agents_threshold: 0.5

  # whether to completely disable traffic light faces in the semantic rasterizer
  disable_traffic_light_faces: False

###################
## Data loader options
train_data_loader:
  key: "scenes/train.zarr"
  batch_size: 16
  shuffle: True
  num_workers: 4

val_data_loader:
  key: "scenes/sample.zarr"
  batch_size: 16
  shuffle: False
  num_workers: 4

test_data_loader:
  key: "scenes/test.zarr"
  batch_size: 8
  shuffle: False
  num_workers: 4

###################
## Train params
train_params:
  checkpoint_every_n_steps: 10000
  max_num_steps: 5
  eval_every_n_steps: 10000


## Dataset definitions

In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
from l5kit.configs import load_config_data
from l5kit.data import PERCEPTION_LABELS, ChunkedDataset, LocalDataManager
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.geometry import transform_points
from l5kit.rasterization import build_rasterizer
from l5kit.visualization import TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from torch.utils.data import DataLoader

is_kaggle = os.path.isdir("/kaggle")

config_file = "agent_motion_config.yml"

data_root = (
    "/kaggle/input/lyft-motion-prediction-autonomous-vehicles"
    if is_kaggle
    else "lyft-motion-prediction-autonomous-vehicles"
)


class MyDataset(object):
    def __init__(self, config_file: str = config_file, data_root: str = data_root):
        super().__init__()
        self.cfg = load_config_data(config_file)
        self.dm = LocalDataManager(data_root)
        self.rast = build_rasterizer(self.cfg, self.dm)
        # self.ego_dataset = EgoDataset(self.cfg, self.zarr_dataset, self.rast)

    def chunked_dataset(self, key: str):
        dl_cfg = self.cfg[key]
        dataset_path = self.dm.require(dl_cfg["key"])
        zarr_dataset = ChunkedDataset(dataset_path)
        zarr_dataset.open()
        return zarr_dataset

    @property
    def val_data_loader(self):
        key = "val_data_loader"
        dl_cfg = self.cfg[key]
        zarr_dataset = self.chunked_dataset(key)
        agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rast)
        return DataLoader(
            agent_dataset,
            shuffle=dl_cfg["shuffle"],
            batch_size=dl_cfg["batch_size"],
            num_workers=dl_cfg["num_workers"],
            pin_memory=True,
        )

    @property
    def test_data_loader(self):
        key = "test_data_loader"
        dl_cfg = self.cfg[key]
        zarr_dataset = self.chunked_dataset(key)
        test_mask = np.load(f"{data_root}/scenes/mask.npz")["arr_0"]
        agent_dataset = AgentDataset(
            self.cfg, zarr_dataset, self.rast, agents_mask=test_mask
        )
        return DataLoader(
            agent_dataset,
            shuffle=dl_cfg["shuffle"],
            batch_size=dl_cfg["batch_size"],
            num_workers=dl_cfg["num_workers"],
            pin_memory=True,
        )

    @property
    def train_data_loader(self):
        key = "train_data_loader"
        dl_cfg = self.cfg[key]
        zarr_dataset = self.chunked_dataset(key)
        agent_dataset = AgentDataset(self.cfg, zarr_dataset, self.rast)
        return DataLoader(
            agent_dataset,
            shuffle=dl_cfg["shuffle"],
            batch_size=dl_cfg["batch_size"],
            num_workers=dl_cfg["num_workers"],
            pin_memory=True,
        )

    def plt_show_agent_map(self, idx):
        data = self.agent_dataset[idx]
        im = data["image"].transpose(1, 2, 0)
        im = self.rast.to_rgb(im)
        target_positions_pixels = transform_points(
            data["target_positions"] + data["centroid"][:2], data["world_to_image"]
        )
        draw_trajectory(
            im, target_positions_pixels, data["target_yaws"], TARGET_POINTS_COLOR
        )
        plt.imshow(im[::-1])
        plt.show()


## Model definition

In [None]:
from typing import Dict

import numpy as np
import torch
from l5kit.evaluation.csv_utils import write_pred_csv
from l5kit.evaluation.metrics import neg_multi_log_likelihood
from l5kit.geometry import transform_points
from torch import nn, optim
from torchvision.models.resnet import resnet18, resnet50
from tqdm.notebook import tqdm

try:
    from .data_loading import MyDataset
except:
    pass

import torch
from torch import Tensor


# loss definition from https://www.kaggle.com/corochann/lyft-training-with-multi-mode-confidence
def pytorch_neg_multi_log_likelihood_batch(
    gt: Tensor, pred: Tensor, confidences: Tensor, avails: Tensor
) -> Tensor:
    """
    Compute a negative log-likelihood for the multi-modal scenario.
    log-sum-exp trick is used here to avoid underflow and overflow, For more information about it see:
    https://en.wikipedia.org/wiki/LogSumExp#log-sum-exp_trick_for_log-domain_calculations
    https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    https://leimao.github.io/blog/LogSumExp/
    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(modes)x(time)x(2D coords)
        confidences (Tensor): array of shape (bs)x(modes) with a confidence for each mode in each sample
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    assert len(pred.shape) == 4, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
    batch_size, num_modes, future_len, num_coords = pred.shape

    assert gt.shape == (
        batch_size,
        future_len,
        num_coords,
    ), f"expected 2D (Time x Coords) array for gt, got {gt.shape}"
    assert confidences.shape == (
        batch_size,
        num_modes,
    ), f"expected 1D (Modes) array for confidences, got {confidences.shape}"
    assert torch.allclose(
        torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))
    ), "confidences should sum to 1"
    assert avails.shape == (
        batch_size,
        future_len,
    ), f"expected 1D (Time) array for avails, got {avails.shape}"
    # assert all data are valid
    assert torch.isfinite(pred).all(), "invalid value found in pred"
    assert torch.isfinite(gt).all(), "invalid value found in gt"
    assert torch.isfinite(confidences).all(), "invalid value found in confidences"
    assert torch.isfinite(avails).all(), "invalid value found in avails"

    # convert to (batch_size, num_modes, future_len, num_coords)
    gt = torch.unsqueeze(gt, 1)  # add modes
    avails = avails[:, None, :, None]  # add modes and cords

    # error (batch_size, num_modes, future_len)
    error = torch.sum(
        ((gt - pred) * avails) ** 2, dim=-1
    )  # reduce coords and use availability

    with np.errstate(
        divide="ignore"
    ):  # when confidence is 0 log goes to -inf, but we're fine with it
        # error (batch_size, num_modes)
        error = torch.log(confidences) - 0.5 * torch.sum(error, dim=-1)  # reduce time

    # use max aggregator on modes for numerical stability
    # error (batch_size, num_modes)
    max_value, _ = error.max(
        dim=1, keepdim=True
    )  # error are negative at this point, so max() gives the minimum one
    error = (
        -torch.log(torch.sum(torch.exp(error - max_value), dim=-1, keepdim=True))
        - max_value
    )  # reduce modes
    # print("error", error)
    return torch.mean(error)


class BaselineModel(nn.Module):
    """Our baseline is a simple resnet34 pretrained on imagenet.
    We must replace the input and the final layer to address our requirements.
    """

    def __init__(self, cfg: Dict, num_modes: int = 3, pretrained=False):
        super().__init__()
        self.num_modes = num_modes
        resnet = resnet50(pretrained=pretrained)
        # change input channels number to match the rasterizer's output
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels
        resnet.conv1 = nn.Conv2d(
            num_in_channels,
            resnet.conv1.out_channels,
            kernel_size=resnet.conv1.kernel_size,
            stride=resnet.conv1.stride,
            padding=resnet.conv1.padding,
            bias=resnet.conv1.bias,
        )

        # 512 for resnet18 and resnet34;
        backbone_out_features = 2048

        self.future_len = cfg["model_params"]["future_num_frames"]
        num_targets = 2 * self.future_len

        self.num_preds = num_targets * self.num_modes

        resnet.fc = nn.Sequential(
            nn.Linear(
                in_features=backbone_out_features,
                # num of modes * preds + confidence
                out_features=self.num_preds + self.num_modes,
            ),
        )
        self.resnet = resnet

    def forward(self, data):
        out = self.resnet(data)
        batch_size = data.shape[0]
        pred, confidences = torch.split(out, self.num_preds, dim=1)
        assert pred.shape == (batch_size, self.num_preds)
        assert confidences.shape == (batch_size, self.num_modes)
        pred = pred.view(batch_size, self.num_modes, self.future_len, 2)
        confidences = torch.softmax(confidences, dim=1)
        return pred, confidences


def training_step(model, criterion, device, batch):
    target_availabilities = batch["target_availabilities"].to(device)
    targets = batch["target_positions"].to(device)
    data = batch["image"].to(device)
    pred, confidences = model(data)
    return criterion(targets, pred, confidences, target_availabilities)


def train_one_epoch(epoch, model, criterion, device, optimizer, train_dl, max_steps):
    for idx, batch in enumerate(tqdm(train_dl, desc=f"Training Epoch {epoch}")):
        if idx > max_steps:
            break
        loss = training_step(
            model=model,
            criterion=criterion,
            device=device,
            batch=batch,
        )
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


def validation_step(model, criterion, device, batch):
    target_availabilities = batch["target_availabilities"].to(device)
    targets = batch["target_positions"].to(device)
    data = batch["image"].to(device)
    pred, confidences = model(data)
    return criterion(targets, pred, confidences, target_availabilities)


def validate_one_epoch(epoch, model, criterion, device, val_dl, max_steps: int):
    all_loses = []
    for idx, batch in enumerate(tqdm(val_dl, desc=f"Validation Epoch {epoch}")):
        if idx > max_steps:
            break
        loss = validation_step(model, criterion, device=device, batch=batch)
        all_loses.append(loss.mean())
    return all_loses


def train_model(
    dataset: MyDataset,
    max_epochs=5,
    max_steps=3000,
    max_val_steps=100,
    learning_rate=1e-3,
    pretrained=False,
):
    assert torch.cuda.is_available(), "gpu must be used"
    device = torch.device("cuda")
    print("training model using", device)
    model = BaselineModel(dataset.cfg, pretrained=pretrained).to(device)
    train_dl = dataset.train_data_loader
    val_dl = dataset.val_data_loader
    criterion = pytorch_neg_multi_log_likelihood_batch
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in tqdm(range(max_epochs), desc="Training"):
        train_one_epoch(
            epoch,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            device=device,
            train_dl=train_dl,
            max_steps=max_steps,
        )

        with torch.no_grad():
            validate_one_epoch(
                epoch=epoch,
                model=model,
                criterion=criterion,
                device=device,
                val_dl=val_dl,
                max_steps=max_val_steps,
            )
    return model


def convert_agent_coordinates_to_world_offsets(
    agents_coords: np.ndarray,
    world_from_agents: np.ndarray,
    centroids: np.ndarray,
) -> np.ndarray:
    coords_offset = []
    for agent_coords, world_from_agent, centroid in zip(
        agents_coords, world_from_agents, centroids
    ):
        predition_offset = []
        for agent_coord in agent_coords:
            predition_offset.append(
                transform_points(agent_coord, world_from_agent) - centroid[:2]
            )
        predition_offset = np.stack(predition_offset)
        coords_offset.append(predition_offset)
    return np.stack(coords_offset)


def evaluation(cfg: Dict, model_path: str, dataset: MyDataset):
    assert torch.cuda.is_available(), "GPU must be used"
    device = torch.device("cuda")

    model = BaselineModel(cfg=cfg)
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()

    with torch.no_grad():
        pred_coords_list = []
        confidences_list = []
        timestamps_list = []
        track_id_list = []

        for data in tqdm(dataset.test_data_loader):
            inputs = data["image"].to(device)
            # target_availabilities = data["target_availabilities"].to(device)
            # targets = data["target_positions"].to(device)
            pred, confidences = model(inputs)
            pred = convert_agent_coordinates_to_world_offsets(
                pred.detach().cpu().numpy(),
                data["world_from_agent"].numpy(),
                data["centroid"].numpy(),
            )
            pred_coords_list.append(pred)
            confidences_list.append(confidences.detach().cpu().numpy())
            timestamps_list.append(data["timestamp"].detach().numpy())
            track_id_list.append(data["track_id"].detach().numpy())

        timestamps = np.concatenate(timestamps_list)
        track_ids = np.concatenate(track_id_list)
        coords = np.concatenate(pred_coords_list)
        confs = np.concatenate(confidences_list)

    return timestamps, track_ids, coords, confs


if __name__ == "__main__":
    print("loading dataset")
    my_dataset = MyDataset()
    print("dataset is loaded, starting training")
    model = train_model(my_dataset)
    print("training done, saving state and generating results")
    torch.save(model.state_dict(), "model_state_last.pth")

    timestamps, track_ids, coords, confs = evaluation(
        my_dataset.cfg, "model_state_last.pth", my_dataset
    )
    write_pred_csv(
        "submission.csv",
        timestamps=timestamps,
        track_ids=track_ids,
        coords=coords,
        confs=confs,
    )
