# Lyft - Expt 30 - Resnet34 - 1cycleLR - 64 - lr=1e-4, SGD - l5kit 20201121-rot no_rot, 24M

Set to 24M for total steps in the 1cycleLR

No rotated or mv

Modified from [Lyft: Complete train and prediction pipeline](https://www.kaggle.com/huanvo/lyft-complete-train-and-prediction-pipeline)

# Environment setup

In [None]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.resnet import resnet50, resnet18, resnet34, resnet101
from tqdm import tqdm

import l5kit
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import matplotlib.pyplot as plt

import os
import random
import time

from IPython.display import display
from tqdm import tqdm_notebook
import gc, psutil

import warnings
warnings.filterwarnings("ignore")

print(l5kit.__version__)
print(l5kit.__file__)

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
# set_seed(42)

In [None]:
# Memory measurement
def memory(verbose=True):
    mem = psutil.virtual_memory()
    gb = 1024*1024*1024
    if verbose:
        print('Physical memory:',
              '%.2f GB (used),'%((mem.total - mem.available) / gb),
              '%.2f GB (available)'%((mem.available) / gb), '/',
              '%.2f GB'%(mem.total / gb))
    return (mem.total - mem.available) / gb

def gc_memory(verbose=True):
    m = gc.collect()
    if verbose:
        print('GC:', m, end=' | ')
        memory()

memory();

# Configs

In [None]:
on_kaggle = False
folder = '/kaggle' if on_kaggle else '..'
original_model_weight_path = f'{folder}/input/lyft-pretrained-model-hv/model_multi_update_lyft_public.pth'

model_name = 'model-30-resnet34-1cycleLR-64-sgd-24M'
original_model_name = model_name  # when using from other model

test_run = False

train_round = 1  # start from 1

set_seed(41+train_round)  # so that we don't go through the same train set for every round

In [None]:
# --- Lyft configs ---
cfg = {
    'format_version': 4,
    'data_path': f'{folder}/input/lyft-motion-prediction-autonomous-vehicles',
    'model_params': {
        'train': True,
        'predict': True,
        'validate': True,
        'model_architecture': 'resnet34',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1,
        'velocity_corrected_yaw': False,
        'num_modes': 3,
        'lr': 1e-4,                 # (1e-3) change learning rate
        'lr_reduce': 0.667,         # not used when using lr scheduler
        'lr_reduce_steps': 300_000, # not used when using lr scheduler
        'lr_scheduler': True,
        'max_lr': 0.001,              # 0.01 only work when using lr_scheduler
        'lr_scheduler_expect_rounds': 5,
        'load_pretrain_weight': True if train_round > 1 else False,
        'weight_path': f'{folder}/{"input/lyftmodels" if on_kaggle else "models"}/{original_model_name}-{train_round - 1}-final.pth',
        'load_pretrain_optimizer': True if train_round > 1 else False,
        'optimizer_path': f'{folder}/{"input/lyftmodels" if on_kaggle else "models"}/{original_model_name}_optimizer-{train_round - 1}-final.pth',
        'load_pretrain_scheduler': True if train_round > 1 else False,  # expect to train 5 rounds
#         'load_pretrain_scheduler': False,
        'scheduler_path': f'{folder}/{"input/lyftmodels" if on_kaggle else "models"}/{original_model_name}_scheduler-{train_round - 1}-final.pth',
    },
    'raster_params': {
        'raster_size': [224, 224],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5,
    },
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 64,  # 16
        'shuffle': True,
        'num_workers': 4 if on_kaggle else 6,  # test parallelization
        'prefetch_factor': 16,  # 32
        'min_frame_future': 1,   # default 1, new in v26
    },
    'val_data_loader': {
        'key': 'scenes/validate.zarr',
        'batch_size': 128,
        'shuffle': True,
        'num_workers': 4 if on_kaggle else 6,
        'prefetch_factor': 4,
        'num_batches': 1000,
    },
    'val_100_data_loader': {
        'key': 'scenes/validate_chopped_100/validate.zarr',
        'batch_size': 128,
        'shuffle': False,
        'num_workers': 4 if on_kaggle else 6,
        'prefetch_factor': 4,
        'num_batches': 1000,
        'mask_path': f'{folder}/input/lyft-motion-prediction-autonomous-vehicles/scenes/validate_chopped_100/mask.npz',
        'truth_path': f'{folder}/input/lyft-motion-prediction-autonomous-vehicles/scenes/validate_chopped_100/gt.csv',
    },
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 128,
        'shuffle': False,
        'num_workers': 4 if on_kaggle else 6,
        'prefetch_factor': 4,
        'mask_path': f'{folder}/input/lyft-motion-prediction-autonomous-vehicles/scenes/mask.npz',
    },
    'train_params': {
        'steps': 100,
        'update_steps': 10,
        'checkpoint_steps': 50,
        'replay_steps': 0,
        'replay_cache': 50,
    } if test_run else {
        'steps': 16000 if on_kaggle else 75_000,  # 300_000, 75_000, 50_000, 12_500
        'update_steps': 200,  # change in v12
        'checkpoint_steps': 50000,
        'replay_steps': 0,
        'replay_cache': 50,
    }}

In [None]:
if on_kaggle and cfg['train_data_loader']['num_workers'] > 0:
    os.environ["BLOSC_NOLOCK"] = "1"

Couple of things to note:

 - **model_architecture:** you can put 'resnet18', 'resnet34' or 'resnet50'. For the pretrained model we use resnet18 so we need to use 'resnet18' in the config.
 - **weight_path:** path to the pretrained model. If you don't have a pretrained model and want to train from scratch, put **weight_path** = False. 
 - **model_name:** the name of the model that will be saved as output, this is only when **train**= True.
 - **train:** True if you want to continue to train the model. Unfortunately due to Kaggle memory constraint if **train**=True then you should put **predict** = False.
 - **predict:** True if you want to predict and submit to Kaggle. Unfortunately due to Kaggle memory constraint if you want to predict then you need  to put **train** = False.
 - **lr:** learning rate of the model, feel free to change as you see fit. In the future I also plan to implement learning rate decay. 
 - **raster_size:** specify the size of the image, the default is [224,224]. Increase **raster_size** can improve the score. However the training time will be significantly longer. 
 - **batch_size:** number of inputs for one forward pass, again one of the parameters to tune. 
 - **max_num_steps:** the number of iterations to train, i.e. number of epochs.
 - **checkpoint_every_n_steps:** the model will be saved at every n steps, again change this number as to how you want to keep track of the model.

# Load the train and test datasets

In [None]:
%%time
DIR_INPUT = cfg["data_path"]
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager()

In [None]:
%%time
# Train dataset
if cfg["model_params"]["train"]:
    from parallelized_lyft_dataset6 import LyftDataset, lyft_dataset_worker_init_func
    train_cfg = cfg["train_data_loader"]
    # train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open(cached=False)  # try to turn off cache
    # train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
    train_dataset = LyftDataset(cfg, dm, 'train_data_loader', fast=True, min_frame_future=train_cfg['min_frame_future'])
    train_dataloader = DataLoader(
        train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"],
        num_workers=train_cfg["num_workers"], persistent_workers=True,
        worker_init_fn=lyft_dataset_worker_init_func, pin_memory=True,
        prefetch_factor=train_cfg['prefetch_factor'],  # update in v11
    )
    # print(train_dataset)
    print('train set size:', len(train_dataset))

# Simple visualization

Let us visualize how an input to the model looks like.

In [None]:
def visualize_trajectory(dataset, index, title="target_positions movement with draw_trajectory"):
    data = dataset[index]
    im = dataset.rasterizer.to_rgb(data["image"].transpose(1, 2, 0))
    target_positions_pixels = transform_points(data["target_positions"], data["raster_from_agent"])
    draw_trajectory(im, target_positions_pixels, TARGET_POINTS_COLOR, radius=1, yaws=data["target_yaws"])

    plt.title(title)
    plt.imshow(im, origin='lower')
    plt.show()

In [None]:
# i_plot = 66652

# plt.figure(figsize=(8, 6))
# visualize_trajectory(train_dataset, index=i_plot)

# plt.figure(figsize=(15, 15))
# for i in range(25):
#     plt.subplot(5, 5, i+1).set_title(f'{i}')
#     plt.imshow(train_dataset[i_plot]['image'][i])
# plt.show()

# Loss function
For this competition it is important to use the correct loss function when train the model. Our goal is to predict three possible paths together with the confidence score, so we need to use the loss function that takes that into account, simply using RMSE will not lead to an accurate model. More information about the loss function can be found here [negative log likelihood](https://github.com/lyft/l5kit/blob/master/competition.md).

In [None]:
# --- Function utils ---
# Original code from https://github.com/lyft/l5kit/blob/20ab033c01610d711c3d36e1963ecec86e8b85b6/l5kit/l5kit/evaluation/metrics.py
import numpy as np

import torch
from torch import Tensor


def pytorch_neg_multi_log_likelihood_batch(
    gt: Tensor, pred: Tensor, confidences: Tensor, avails: Tensor
) -> Tensor:
    """
    Compute a negative log-likelihood for the multi-modal scenario.
    log-sum-exp trick is used here to avoid underflow and overflow, For more information about it see:
    https://en.wikipedia.org/wiki/LogSumExp#log-sum-exp_trick_for_log-domain_calculations
    https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    https://leimao.github.io/blog/LogSumExp/
    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(modes)x(time)x(2D coords)
        confidences (Tensor): array of shape (bs)x(modes) with a confidence for each mode in each sample
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    assert len(pred.shape) == 4, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
    batch_size, num_modes, future_len, num_coords = pred.shape

    assert gt.shape == (batch_size, future_len, num_coords), f"expected 2D (Time x Coords) array for gt, got {gt.shape}"
    assert confidences.shape == (batch_size, num_modes), f"expected 1D (Modes) array for gt, got {confidences.shape}"
    assert torch.allclose(torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))), "confidences should sum to 1"
    assert avails.shape == (batch_size, future_len), f"expected 1D (Time) array for gt, got {avails.shape}"
    # assert all data are valid
    assert torch.isfinite(pred).all(), "invalid value found in pred"
    assert torch.isfinite(gt).all(), "invalid value found in gt"
    assert torch.isfinite(confidences).all(), "invalid value found in confidences"
    assert torch.isfinite(avails).all(), "invalid value found in avails"

    # convert to (batch_size, num_modes, future_len, num_coords)
    gt = torch.unsqueeze(gt, 1)  # add modes
    avails = avails[:, None, :, None]  # add modes and cords

    # error (batch_size, num_modes, future_len)
    error = torch.sum(((gt - pred) * avails) ** 2, dim=-1)  # reduce coords and use availability

    with np.errstate(divide="ignore"):  # when confidence is 0 log goes to -inf, but we're fine with it
        # error (batch_size, num_modes)
        error = torch.log(confidences) - 0.5 * torch.sum(error, dim=-1)  # reduce time

    # use max aggregator on modes for numerical stability
    # error (batch_size, num_modes)
    max_value, _ = error.max(dim=1, keepdim=True)  # error are negative at this point, so max() gives the minimum one
    error = -torch.log(torch.sum(torch.exp(error - max_value), dim=-1, keepdim=True)) - max_value  # reduce modes
    # print("error", error)
    return torch.mean(error)


def pytorch_neg_multi_log_likelihood_single(
    gt: Tensor, pred: Tensor, avails: Tensor
) -> Tensor:
    """

    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(time)x(2D coords)
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    # pred (bs)x(time)x(2D coords) --> (bs)x(mode=1)x(time)x(2D coords)
    # create confidence (bs)x(mode=1)
    batch_size, future_len, num_coords = pred.shape
    confidences = pred.new_ones((batch_size, 1))
    return pytorch_neg_multi_log_likelihood_batch(gt, pred.unsqueeze(1), confidences, avails)

In [None]:
# Louis improved
def pytorch_neg_multi_log_likelihood_batch_imp(
    true: Tensor, pred: Tensor, confidences: Tensor, avails: Tensor, test_run=test_run,
) -> Tensor:
    """
    Args:
        true (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(modes)x(time)x(2D coords)
        confidences (Tensor): array of shape (bs)x(modes) with a confidence for each mode in each sample
        avails (Tensor): array of shape (bs)x(time) with the availability for each true timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    if test_run:
        assert len(pred.shape) == 4, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
        batch_size, num_modes, future_len, num_coords = pred.shape

        assert true.shape == (batch_size, future_len, num_coords), f"expected 2D (Time x Coords) array for true, got {true.shape}"
        assert confidences.shape == (batch_size, num_modes), f"expected 1D (Modes) array for true, got {confidences.shape}"
        assert torch.allclose(torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))), "confidences should sum to 1"
        assert avails.shape == (batch_size, future_len), f"expected 1D (Time) array for true, got {avails.shape}"
        # assert all data are valid
        assert torch.isfinite(pred).all(), "invalid value found in pred"
        assert torch.isfinite(true).all(), "invalid value found in true"
        assert torch.isfinite(confidences).all(), "invalid value found in confidences"
        assert torch.isfinite(avails).all(), "invalid value found in avails"

    # convert to (batch_size, num_modes, future_len, num_coords)
    true = torch.unsqueeze(true, 1)  # add modes
    avails = avails[:, None, :, None]  # add modes and cords

    # error (batch_size, num_modes, future_len)
    error = torch.sum(((pred - true) * avails) ** 2, dim=-1)  # reduce coords and use availability
    error = torch.sum(error, dim=-1)  # reduce time
    error = torch.log(confidences + 1e-8) - error / 2
    error = -torch.logsumexp(error, dim=-1)
    return torch.mean(error)

# Model
Next we define the baseline model. Note that this model will return three possible trajectories together with confidence score for each trajectory.

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# class OriginalLyftMultiModel(nn.Module):
#     def __init__(self, cfg: Dict):
#         super().__init__()

#         architecture = cfg['model_params']['model_architecture']
#         backbone = eval(architecture)(pretrained=True, progress=True)
#         self.backbone = backbone

#         num_history_channels = (cfg['model_params']['history_num_frames'] + 1) * 2
#         num_in_channels = 3 + num_history_channels

#         self.backbone.conv1 = nn.Conv2d(
#             num_in_channels,
#             self.backbone.conv1.out_channels,
#             kernel_size=self.backbone.conv1.kernel_size,
#             stride=self.backbone.conv1.stride,
#             padding=self.backbone.conv1.padding,
#             bias=False,
#         )

#         # This is 512 for resnet18 and resnet34
#         # And it is 2048 for the other resnets        
#         if architecture == 'resnet50':
#             backbone_out_features = 2048
#         else:
#             backbone_out_features = 512

#         # X, Y coords for the future positions (output shape: batch_sizex50x2)
#         self.future_len = cfg['model_params']['future_num_frames']
#         num_targets = 2 * self.future_len
#         num_modes = cfg['model_params']['num_modes']

#         # You can add more layers here.
#         self.head = nn.Sequential(
#             # nn.Dropout(0.2),
#             nn.Linear(in_features=backbone_out_features, out_features=4096),
#         )

#         self.num_preds = num_targets * num_modes
#         self.num_modes = num_modes

#         self.logit = nn.Linear(4096, out_features=self.num_preds + num_modes)

#     def forward(self, x):
#         x = self.backbone.conv1(x)
#         x = self.backbone.bn1(x)
#         x = self.backbone.relu(x)
#         x = self.backbone.maxpool(x)

#         x = self.backbone.layer1(x)
#         x = self.backbone.layer2(x)
#         x = self.backbone.layer3(x)
#         x = self.backbone.layer4(x)

#         x = self.backbone.avgpool(x)
#         x = torch.flatten(x, 1)

#         x = self.head(x)
#         x = self.logit(x)

#         # pred (batch_size)x(modes)x(time)x(2D coords)
#         # confidences (batch_size)x(modes)
#         bs, _ = x.shape
#         pred, confidences = torch.split(x, self.num_preds, dim=1)
#         pred = pred.view(bs, self.num_modes, self.future_len, 2)
#         assert confidences.shape == (bs, self.num_modes)
#         confidences = torch.softmax(confidences, dim=1)
#         return pred, confidences

In [None]:
# not used
def extrapolate_positions(history_positions, history_avail):
    velocity_avail = history_avail[:, 1:]
    avg_velocities = -10 * history_positions[:, 1:] / torch.arange(1, 11, device=device, requires_grad=False)[None, :, None]  # assume 0.1 s/frame 
    # weighted_avg_velocities
    weighted_avg_velocities = (
        (avg_velocities * velocity_avail[:, :, None]).sum(axis=1)
        / (velocity_avail.sum(axis=1)[:, None] + 1e-8)  # 1e-8 to avoid devide by zero error
    )
    # speed threshold
    speed_threshold = (weighted_avg_velocities**2).sum(axis=1) > 1.0  # filter out speed^2 < (1.0m/s)^2 cases
    weighted_avg_velocities_th = weighted_avg_velocities * speed_threshold[:, None]
    # extrapolation_positions_th
    # assume 0.1 s/frame
    return weighted_avg_velocities_th[:, None, :] * torch.arange(1, 51, device=device, requires_grad=False)[None, :, None] * 0.1    

In [None]:
def get_weighted_avg_velocities_and_rotation(history_positions, history_avail):
    velocity_avail = history_avail[:, 1:]
    # assume 0.1 s/frame 
    avg_velocities = (
        -10 * history_positions[:, 1:] / torch.arange(1, 11, device=device, requires_grad=False)[None, :, None]
    )
    # weighted_avg_velocities
    weighted_avg_velocities = (
        (avg_velocities * velocity_avail[:, :, None]).sum(axis=1)
        / (velocity_avail.sum(axis=1)[:, None] + 1e-8)  # 1e-8 to avoid devide by zero error
    )
    # speed threshold
    speed_square = (weighted_avg_velocities**2).sum(axis=1)
    speed_threshold = speed_square > 1.0  # filter out speed^2 < (1.0m/s)^2 cases
    speed_th = torch.sqrt(speed_square * speed_threshold)
    weighted_avg_velocities_th = weighted_avg_velocities * speed_threshold[:, None]
    # Construct rotation matrix from velocity
    cs = weighted_avg_velocities_th / (speed_th + 1e-8)[:, None]  # (cos(theta), sin(theta))
    c = cs[:, 0]
    s = cs[:, 1]
    rotation_matrix = torch.stack((
        torch.stack((c, -s), dim=1),
        torch.stack((s, c), dim=1),
    ), dim=1)
    # for speed=0 case, we use identity matrix (meaning no rotation)
    identity_matrix = (
        torch.eye(2, device=device, requires_grad=False)[None, :, :] * (~speed_threshold)[:, None, None]
    )
    rotation_matrix = rotation_matrix + identity_matrix
    return speed_th, rotation_matrix
def extrapolate_position_x(speed):
    # assume 0.1 s/frame
    velocity = torch.stack((speed, torch.zeros_like(speed)), dim=1)
    return velocity[:, None, :] * torch.arange(1, 51, device=device, requires_grad=False)[None, :, None] * 0.1    

In [None]:
class LyftMultiModel(nn.Module):
    def __init__(self, cfg: Dict):
        super().__init__()

        architecture = cfg['model_params']['model_architecture']
        backbone = eval(architecture)(pretrained=True, progress=True)
        self.backbone = backbone

        num_history_channels = (cfg['model_params']['history_num_frames'] + 1) * 2
        num_in_channels = 3 + num_history_channels

        self.backbone.conv1 = nn.Conv2d(
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )

        # This is 512 for resnet18 and resnet34
        # And it is 2048 for the other resnets        
        if architecture == 'resnet50':
            backbone_out_features = 2048
        else:
            backbone_out_features = 512

        # X, Y coords for the future positions (output shape: batch_sizex50x2)
        self.future_len = cfg['model_params']['future_num_frames']
        num_targets = 2 * self.future_len
        num_modes = cfg['model_params']['num_modes']

        # You can add more layers here.
        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=backbone_out_features, out_features=4096),
        )

        self.num_preds = num_targets * num_modes
        self.num_modes = num_modes

        self.logit = nn.Linear(4096, out_features=self.num_preds + num_modes)

#     def forward(self, x, history_positions, history_avail):
    def forward(self, x):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)

        x = self.head(x)
        x = self.logit(x)
        
#         # Compute weighted mean velocity and rotation matrix
#         # speed (batch_size)
#         # rotation_matrix (batch_size) x (2D coords target) x (2D coords inner)
#         speed, rotation_matrix = get_weighted_avg_velocities_and_rotation(history_positions, history_avail)
#         # extrapolate using the speed in x direction
#         extrapolation_positions_th = extrapolate_position_x(speed)
        
#         # Extrapolate historical positions using weighted mean velocity
#         extrapolation_positions_th = extrapolate_positions(history_positions, history_avail)

        # pred (batch_size)x(modes)x(time)x(2D coords)
        # confidences (batch_size)x(modes)
        bs, _ = x.shape
        pred, confidences = torch.split(x, self.num_preds, dim=1)
        pred = pred.view(bs, self.num_modes, self.future_len, 2)
#         # pred = pred + extrapolation_positions_th[:, None, :, :]
#         pred0, pred12 = torch.split(pred, [1, 2], dim=1)
#         pred0 = pred0 + extrapolation_positions_th[:, None, :, :]  # only add to the first mode
#         pred = torch.cat((pred0, pred12), dim=1)
#         # rotate from inner space to target space
#         pred = torch.sum(pred[:, :, :, None, :] * rotation_matrix[:, None, None, :, :], dim=-1)
        # assert confidences.shape == (bs, self.num_modes)
        confidences = torch.softmax(confidences, dim=1)
        return pred, confidences

In [None]:
def forward(data, model, device, criterion=pytorch_neg_multi_log_likelihood_batch_imp, compute_loss=True):
    image = data['image'].to(device)
#     history_positions = data['history_positions'].to(device)
#     history_avail = data['history_availabilities'].to(device)
    targets = data['target_positions'].to(device)
    target_avail = data['target_availabilities'].to(device)
    # Forward pass
#     preds, confidences = model(image, history_positions, history_avail)
    preds, confidences = model(image)
    loss = criterion(targets, preds, confidences, target_avail) if compute_loss else -1
    return loss, preds, confidences

In [None]:
# def forward(data, model, device, criterion=pytorch_neg_multi_log_likelihood_batch, compute_loss=True):
#     image = data['image'].to(device)
#     target_availabilities = data['target_availabilities'].to(device)
#     targets = data['target_positions'].to(device)
#     history_availabilities = data['history_availabilities'].to(device)
#     history_positions = data['history_positions'].to(device)
#     history_yaws = data['history_yaws'].to(device)
#     # Forward pass
#     preds, confidences = model(image, history_positions, history_availabilities, history_yaws)
#     loss = criterion(targets, preds, confidences, target_availabilities) if compute_loss else -1
#     return loss, preds, confidences

In [None]:
def set_train_for_resnet(model, is_train):
    for child in model.children():
        if isinstance(child, torchvision.models.resnet.ResNet):
            for param in child.parameters():
                param.requires_grad = is_train

def check_resnet_train(model):
    is_train = []
    for child in model.children():
        if isinstance(child, torchvision.models.resnet.ResNet):
            for param in child.parameters():
                is_train.append(param.requires_grad)
    return is_train

In [None]:
%%time
# ==== INIT MODEL=================
model = LyftMultiModel(cfg)

#load weight if there is a pretrained model
if cfg['model_params']['load_pretrain_weight']:
    print('load pretrained model..')
    model.load_state_dict(torch.load(cfg["model_params"]["weight_path"]))

# turn off training for resnet (don't work since we have another layer)
# set_train_for_resnet(model, False)
    
model.to(device)
print(f'device {device}')
# optimizer = optim.Adam(model.parameters(), lr=cfg['model_params']['lr'])
optimizer = optim.SGD(model.parameters(), lr=cfg['model_params']['lr'], momentum=0.9)
if cfg['model_params']['load_pretrain_optimizer']:
    print('load pretrained optimizer..')
    optimizer.load_state_dict(torch.load(cfg['model_params']['optimizer_path']))
    # overwrite learning rate
    optimizer.param_groups[0]['lr'] = cfg['model_params']['lr']

In [None]:
print(model)

In [None]:
print(check_resnet_train(model))

In [None]:
for param_group in optimizer.param_groups:
    print(param_group['lr'])

In [None]:
if cfg['model_params']['lr_scheduler']:
    scheduler = OneCycleLR(optimizer, max_lr=cfg["model_params"]['max_lr'], 
                           total_steps=cfg["train_params"]["steps"]*cfg['model_params']['lr_scheduler_expect_rounds'])
    if cfg['model_params']['load_pretrain_scheduler']:
        print('load pretrained scheduler..')
        scheduler.load_state_dict(torch.load(cfg['model_params']['scheduler_path']))
    print(scheduler.state_dict())

# Training loop
Next let us implement the training loop, when the **train** parameter is set to True. 

In [None]:
train_dataset_total_batches = int(np.ceil(len(train_dataset) / cfg['train_data_loader']['batch_size']))
print('Number of batches in train:', train_dataset_total_batches)
print('We will only train:', cfg["train_params"]["steps"], 'batches (%.4f%%)'%(cfg["train_params"]["steps"] * 100 / train_dataset_total_batches))

In [None]:
len(train_dataset)

In [None]:
if cfg["model_params"]["train"]:
    tr_it = iter(train_dataloader)

In [None]:
# New in v11
# from collections import deque
class ReplayMemory(object):
    ''' storage class for sample reuse '''
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []

    def push(self, item):
        """Saves a transition."""
        if len(self.memory) >= self.capacity:
            del self.memory[0]
        self.memory.append(item)

    def sample(self, last_item=False):
        if last_item:
            item = self.memory[-1]
        else:
            item = self.memory[np.random.randint(len(self.memory))]
        return item

    def __len__(self):
        return len(self.memory)

In [None]:
lr_reduce = cfg['model_params']['lr_reduce']
lr_reduce_steps = cfg['model_params']['lr_reduce_steps']
lr_reduce, lr_reduce_steps

In [None]:
def reduce_learning_rate(optimizer, reduce_factor, verbose=True):
    for i, param_group in enumerate(optimizer.param_groups):
        old_lr = float(param_group['lr'])
        new_lr = old_lr * reduce_factor
        param_group['lr'] = new_lr
        if verbose:
            print('Reduce learning rate of group {} from {:.4e} to {:.4e}.'.format(i, old_lr, new_lr))

In [None]:
%%time
if cfg["model_params"]["train"]:
    replay_memory = ReplayMemory(cfg['train_params']['replay_cache'])
    n_replay_steps = cfg['train_params']['replay_steps']
    n_steps = cfg["train_params"]["steps"]
    progress_bar = tqdm_notebook(range(1, 1 + n_steps), mininterval=5.)
    losses_all = []
    iterations = []
    all_metrics = []
    metrics = []
    memorys = []
    times = []
    lr_history = []
    update_steps = cfg['train_params']['update_steps']
    checkpoint_steps = cfg['train_params']['checkpoint_steps']
    t_start = time.time()
    i_epochs = 1
    torch.set_grad_enabled(True)
    
    for i in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
            i_epochs += 1
        if n_replay_steps > 0:
            # Replay
            replay_memory.push(data)
            for r in range(n_replay_steps):
                data = replay_memory.sample(r == 0)
                model.train()
                loss, _, _ = forward(data, model, device)

                # (change after the run v11)
                if r == 0:
                    loss_v = loss.item()

                # Backward pass
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        else:
            model.train()
            loss, _, _ = forward(data, model, device)

            loss_v = loss.item()

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        losses_all.append(loss_v)
        
        if i % update_steps == 0:
            mean_losses_all = np.mean(losses_all)
            mean_losses_update = np.mean(losses_all[-update_steps:])
            timespent = (time.time() - t_start) / 60
            current_lr = optimizer.param_groups[0]['lr']
            print('i: %5d'%i, 'epochs:', i_epochs,
                  'loss: %.5f'%loss_v, 'loss(avg): %.5f'%mean_losses_update, 
                  'loss(all): %.5f'%mean_losses_all, '%.2fmins'%timespent, 'lr=%.2e'%current_lr, end=' | ')
            mem = memory()
            if i % checkpoint_steps == 0:
                torch.save(model.state_dict(), f'{model_name}-{train_round}-{i}.pth')
                torch.save(optimizer.state_dict(), f'{model_name}_optimizer-{train_round}-{i}.pth')
            iterations.append(i)
            all_metrics.append(mean_losses_all)
            metrics.append(mean_losses_update)  # use update_steps mean
            lr_history.append(current_lr)
            memorys.append(mem)
            times.append(timespent)

        if cfg['model_params']['lr_scheduler']:
            scheduler.step()
        if i % lr_reduce_steps == 0:
            reduce_learning_rate(optimizer, lr_reduce)        


In [None]:
del loss

In [None]:
if cfg["model_params"]["train"]:
    torch.save(model.state_dict(), f'{model_name}-{train_round}-final.pth')
    torch.save(optimizer.state_dict(), f'{model_name}_optimizer-{train_round}-final.pth')
    if cfg['model_params']['lr_scheduler']:
        torch.save(scheduler.state_dict(), f'{model_name}_scheduler-{train_round}-final.pth')
    results = pd.DataFrame({
        'iterations': iterations, 
        'metrics (all avg)': all_metrics, 
        'metrics (update avg)': metrics,
        'elapsed_time (mins)': times,
        'memory (GB)': memorys,
        'learning rate': lr_history,
    })
    results.to_csv(f"train_metrics_{model_name}-{train_round}-{n_steps}.csv", index=False)
    print(f"Total training time is {(time.time() - t_start) / 60} mins")
    print('training speed:', iterations[-1] / (60*times[-1]), 'it/s')
    memory()
    display(results)

- === resnet50 ===
- 15.3s         - 8 workers 4 prefetch
- 14.6s (28s)   - 6 workers 2 prefetch
- 16.7s         - 4 workers 2 prefetch
- === resnet18 ===
- 9.22s - 8 workers 8 prefetch
- 13.3  - 8 workers 4 prefetch 2 replay 10 replay_cache
- 13.3  - 8 workers 6 prefetch 2 replay 10 replay_cache
- 13.4  - 8 workers 8 prefetch 2 replay 10 replay_cache
- 17.6  - 8 workers 4 prefetch 3 replay 10 replay_cache
- 9.76  - 6 workers 32 prefetch 2 replay 50 replay_cache 
- 9.76  - 6 workers 32 prefetch 2 replay 50 replay_cache, drop columns 

622: skip join before relu  
605: skip join after relu  
709: skip join after relu additional bn for other  
743: split into 2  

In [None]:
gc_memory()

In [None]:
if cfg["model_params"]["train"]:
    print('training speed:', iterations[-1] / (60*times[-1]), 'it/s')

In [None]:
if cfg["model_params"]["train"]:
    plt.figure(figsize=(12, 4))
    plt.plot(iterations, all_metrics, label='all avg')
    plt.plot(iterations, metrics, label=f'{update_steps}-batch avg', alpha=0.7)
    plt.xlabel('steps'); plt.ylabel('metrics (avg)')
    plt.legend()
    plt.grid(); plt.show()

In [None]:
# expected learning rate (not necessarily the original one)
# lr_sim = cfg['model_params']['lr']
# lr_history = []
# for i in iterations:
#     lr_history.append(lr_sim)
#     if i % lr_reduce_steps == 0:
#         lr_sim = lr_sim * lr_reduce
if cfg["model_params"]["train"]:
    plt.figure(figsize=(12, 3))
    plt.plot(iterations, lr_history, label='all avg')
    plt.xlabel('steps'); plt.ylabel('learning rate')
    plt.legend()
    plt.grid(); plt.show()

In [None]:
if cfg["model_params"]["train"]:
    plt.figure(figsize=(12, 4))
    plt.plot(iterations, memorys)
    plt.xlabel('steps'); plt.ylabel('memory (GB)')
    plt.grid(); plt.show()

In [None]:
if cfg["model_params"]["train"]:
    plt.figure(figsize=(12, 4))
    plt.plot(iterations, times, label='measured data')
    plt.plot([iterations[0], iterations[-1]], [times[0], times[-1]], alpha=0.5, label='linear ref')
    plt.xlabel('steps'); plt.ylabel('elapsed_time (mins)')
    plt.legend(); plt.grid(); plt.show()
    # if we see the measured data below the linear, means the training getting slower when the steps increase

In [None]:
if cfg["model_params"]["train"]:
    tr_it._shutdown_workers()

In [None]:
del tr_it

In [None]:
del replay_memory

In [None]:
del train_dataloader

In [None]:
gc_memory()

### Examine last layer

In [None]:
w = model.logit.weight
b = model.logit.bias

In [None]:
b.shape

In [None]:
b_pred, b_confidences = torch.split(b, model.num_preds, dim=0)
b_pred, b_confidences

In [None]:
b_pred_v = b_pred.view(3, model.future_len, 2)
b_pred_v

In [None]:
b_pred_np = b_pred_v.detach().cpu().numpy().copy()

In [None]:
# bias on the last layer
for i in range(3):
    plt.plot(*b_pred_np[i].T, marker='.', label=f'mode = {i}', alpha=0.8)
plt.grid(); plt.legend(); plt.show()

In [None]:
# bias overall
for i in range(3):
    plt.plot(*b_pred_np[i].T, marker='.', label=f'mode = {i}', alpha=0.8, )
# plt.plot(*target_positions_mean.cpu().numpy().copy().T, linestyle='--', alpha=0.5, label='target mean')
plt.grid(); plt.legend(); plt.show()

In [None]:
# confidence bias
torch.softmax(b_confidences.detach(), dim=0).cpu().numpy()

# Score and Plot functions

In [None]:
# Plot and score functions
from collections import OrderedDict, defaultdict
from l5kit.evaluation import read_gt_csv, read_pred_csv
from l5kit.evaluation.extract_metrics import validate_dicts

def compute_csv_nnl_by_row(truth_path: str, pred_path: str) -> tuple:
    truth = OrderedDict()
    preds = OrderedDict()
    scores = OrderedDict()

    for el in read_gt_csv(truth_path):
        truth[el["track_id"], el["timestamp"]] = el
    for el in read_pred_csv(pred_path):
        preds[el["track_id"], el["timestamp"]] = el

    if not validate_dicts(truth, preds):
        raise ValueError("Error validating csv, see above for details.")

    for key, truth_value in truth.items():
        true_coord = truth_value["coord"]
        avail = truth_value["avail"]

        pred_coords = preds[key]["coords"]
        conf = preds[key]["conf"]
        scores[key] = neg_multi_log_likelihood(true_coord, pred_coords, conf, avail)
        
    # compute average of each metric
    return truth, preds, scores

# plot functions
import matplotlib.patches as mpatches

def row_to_confs(row):
    return [row[f'conf_{i}'] for i in range(3)]
def row_to_coords(row):
    return row[3:].values.reshape(3, 50, 2)

def row_truth_to_avail(row):
    return row[:50].values
def row_truth_to_coords(row):
    return row[50:].values.reshape(50, 2)

# here I use matplotlib default colors
cmap = plt.get_cmap("tab10")
matplotlib_colors_in_rgb_int = [
    [int(255 * x) for x in cmap(i)[:3]] for i in range(10)
]

def generate_image_predicted_trajectory(dataset, df_sub, index):
    data = dataset[index]
    im = data['image'].transpose(1, 2, 0)
    im = dataset.rasterizer.to_rgb(im)
    row = df_sub.loc[(data['timestamp'], data['track_id'])]
    # note submission coordinate system = world - centroid
    predicted_target_positions_in_sub = row_to_coords(row)
    predicted_target_positions_in_world = predicted_target_positions_in_sub + data['centroid']
    for i, coords in enumerate(predicted_target_positions_in_world):
        target_positions_pixels = transform_points(coords, data['raster_from_world'])
        draw_trajectory(im, target_positions_pixels, rgb_color=matplotlib_colors_in_rgb_int[i])
    return im, row_to_confs(row)

def plot_predicted_trajectory(dataset, df_sub, indices, width=15, height=5, n_cols=3, title=''):
    if not isinstance(indices, (list, np.ndarray)):
        indices = [indices]
    n_rows = len(indices) // n_cols + len(indices) % n_cols
    plt.figure(figsize=(width, height*n_rows))
    for k, index in enumerate(indices):
        plt.subplot(n_rows, n_cols, 1+k).set_title(str(index))
        im, confs = generate_image_predicted_trajectory(dataset, df_sub, index)
        patches = [mpatches.Patch(color=cmap(m), label='%.3f'%conf) for m, conf in enumerate(confs)]
        plt.imshow(im, origin='lower')
        plt.legend(handles=patches)
    if title:
        plt.suptitle(title)
    plt.show()

def generate_image_from_submission(data, rasterizer, pred_row=None, truth_row=None, truth_from_data=False):
    im = data['image'].transpose(1, 2, 0)
    im = rasterizer.to_rgb(im)
    target_avail = np.ones(50, dtype='bool')
    if truth_from_data:
        target_avail = data['target_availabilities'] > 0.5
        target_positions_pixels = transform_points(
            data["target_positions"][target_avail], data["raster_from_agent"])
        yaws = data["target_yaws"][target_avail]
        draw_trajectory(im, target_positions_pixels, TARGET_POINTS_COLOR, radius=1, yaws=yaws)
    if truth_row is not None:
        target_avail = row_truth_to_avail(truth_row) > 0.5
        target_positions_in_sub = row_truth_to_coords(truth_row)[target_avail]
        target_positions_in_world = target_positions_in_sub + data['centroid']
        target_positions_pixels = transform_points(
            target_positions_in_world, data['raster_from_world'])
        draw_trajectory(im, target_positions_pixels, TARGET_POINTS_COLOR, radius=2)
    if pred_row is not None:
        # note submission coordinate system = world - centroid
        predicted_target_positions_in_sub = row_to_coords(pred_row)
        predicted_target_positions_in_world = predicted_target_positions_in_sub + data['centroid']
        for i, coords in enumerate(predicted_target_positions_in_world):
            pred_positions_pixels = transform_points(coords[target_avail], data['raster_from_world'])
            draw_trajectory(im, pred_positions_pixels, rgb_color=matplotlib_colors_in_rgb_int[i])
    return im

def plot_validated_trajectory(dataset, df_sub, df_val_truth, indices, width=14, height=7, n_cols=2, suptitle='',
                              scores_val=None):
    if not isinstance(indices, (list, np.ndarray)):
        indices = [indices]
    n_rows = len(indices) // n_cols + len(indices) % n_cols
    plt.figure(figsize=(width, height*n_rows))
    for k, index in enumerate(indices):
        data = dataset[index]
        timestamp = data['timestamp']
        track_id = data['track_id']
        title = f'{index} - {track_id}, {timestamp}'
        if scores_val is not None:
            score = scores_val[f'{track_id}', f'{timestamp}']
            title = title + f' {score}'
        print(title)
        pred_row = df_sub.loc[(timestamp, track_id)]
        truth_row = df_val_truth.loc[(timestamp, track_id)]
        plt.subplot(n_rows, n_cols, 1+k).set_title(title)
        im = generate_image_from_submission(data, dataset.rasterizer, 
                                            pred_row=pred_row, truth_row=truth_row,
                                            truth_from_data=False)
        confs = row_to_confs(pred_row)
        patches = [mpatches.Patch(color=cmap(m), label='%.3f'%conf) for m, conf in enumerate(confs)]
        plt.imshow(im, origin='lower')
        plt.legend(handles=patches)
    if title:
        plt.suptitle(suptitle)
    plt.show()

# Validation 100

In [None]:
%%time
# Validate dataset
if cfg['model_params']['validate']:
    from parallelized_lyft_dataset6 import LyftDataset, lyft_dataset_worker_init_func
#     from parallelized_lyft_dataset6 import lyft_dataset_worker_init_func
#     # Build rasterizer
#     if not cfg["model_params"]['predict']:
#         rasterizer = build_rasterizer(cfg, dm)

    val_cfg = cfg['val_100_data_loader']
#     val_zarr = ChunkedDataset(dm.require(val_cfg["key"])).open(cached=False)  # try to turn off cache
#     val_mask = np.load(val_cfg['mask_path'])["arr_0"]
#     val_dataset = AgentDataset(cfg, val_zarr, rasterizer, agents_mask=val_mask) 
    val_dataset = LyftDataset(cfg, dm, 'val_100_data_loader', agents_mask_path=val_cfg['mask_path']) 
    set_seed(3322)
#     val_dataloader = DataLoader(val_dataset, shuffle=val_cfg["shuffle"],
#                                 batch_size=val_cfg["batch_size"], num_workers=val_cfg["num_workers"])
    val_dataloader = DataLoader(
        val_dataset, shuffle=val_cfg['shuffle'], batch_size=val_cfg['batch_size'],
        num_workers=val_cfg['num_workers'], persistent_workers=False,
        worker_init_fn=lyft_dataset_worker_init_func, pin_memory=False,  # update in v11
        prefetch_factor=val_cfg['prefetch_factor'],  # update in v11
    )
#     print(val_dataset)
    print('val set size:', len(val_dataset))
    
    total_val_batches = int(np.ceil(len(val_dataset) / cfg['val_100_data_loader']['batch_size']))
    print('Number of batches for validation:', total_val_batches)
    # print('Evaluating on:', cfg['val_data_loader']['num_batches'], 'batches (%.4f%%)'%(cfg['val_data_loader']['num_batches'] *100/ total_val_batches))

In [None]:
%%time
if cfg["model_params"]["validate"]:
    
    model.eval()
    torch.set_grad_enabled(False)

    # store information for evaluation
    val_loss = []
    future_coords_offsets_pd = []
    timestamps = []
    confidences_list = []
    agent_ids = []
    memorys_val = []
    t0 = time.time()
    times_val = []
    iterations_val = []
    i_update = 5 if test_run else 50
    
#     val_it = iter(val_dataloader)

#     for i, data in enumerate(tqdm_notebook(range(cfg['val_data_loader']['num_batches']), mininterval=5.)):
    for i, data in enumerate(tqdm_notebook(val_dataloader, mininterval=5.)):
#         data = next(val_it)
        loss, preds, confidences = forward(data, model, device, compute_loss=True)
    
        preds = torch.einsum('bmti,bji->bmtj', 
                             preds.double(), 
                             data["world_from_agent"].to(device)[:, :2, :2]).cpu().numpy()
        
        val_loss.append(loss.item())
        timestamps.append(data["timestamp"].numpy().copy())
        agent_ids.append(data["track_id"].numpy().copy()) 
        future_coords_offsets_pd.append(preds.copy())
        confidences_list.append(confidences.cpu().numpy().copy())
        
        if i % i_update == 0:
            t = ((time.time() - t0) / 60)
            print('%4d'%i, '% 7.3f'%np.mean(val_loss), '%6.2fmins'%t, end=' | ')
            mem = memory()
            iterations_val.append(i)
            memorys_val.append(mem)
            times_val.append(t)
            if test_run and i >= 50:
                break
    print('Validation loss:', np.mean(val_loss))
    print('Total timespent: %6.2fmins'%((time.time() - t0) / 60))
    memory()

In [None]:
# train_round = 1

In [None]:
if cfg["model_params"]["validate"]:
    val_path = f'{model_name}-val_100-submission-{train_round}.csv'
    write_pred_csv(
        val_path,
        timestamps=np.concatenate(timestamps),
        track_ids=np.concatenate(agent_ids),
        coords=np.concatenate(future_coords_offsets_pd),
        confs=np.concatenate(confidences_list),
    )

In [None]:
if cfg["model_params"]["validate"] and not test_run:
    compute_metrics_results = compute_metrics_csv(
        cfg['val_100_data_loader']['truth_path'], val_path, 
        [neg_multi_log_likelihood, time_displace],
    )
    for metric_name, metric_mean in compute_metrics_results.items():
        print(metric_name, metric_mean)

## Examing validation result

In [None]:
%%time
truth_val, preds_val, scores_val = compute_csv_nnl_by_row(cfg['val_100_data_loader']['truth_path'], val_path)

In [None]:
scores_val_list = sorted(list(scores_val.items()), key=lambda x: x[1], reverse=True)

In [None]:
np.mean([x[1] for x in scores_val_list])

In [None]:
# worse case
scores_val_list[:5]

In [None]:
# good case
scores_val_list[-5:]

In [None]:
n_plot = 10
figsize = (8, 8)
# plt.figure(figsize)
for row in scores_val_list[:n_plot]:
    s = row[1]
    key = row[0]
    t = truth_val[key]
    avail = t["avail"].astype('bool')
    true_coord = t["coord"][avail]

    pred_coords = preds_val[key]["coords"]
    confs = preds_val[key]["conf"]
    plt.figure(figsize=figsize)
    plt.plot(*true_coord.T, label='true', linestyle='--', marker='.', alpha=0.8)
    for m, (coord, conf) in enumerate(zip(pred_coords, confs)):
        plt.plot(*coord[avail].T, label=f'pred_{m}: {conf}', alpha=0.6, marker='+')
    plt.legend()
    plt.title(f'{key}, score {s}')
    plt.grid(); plt.show()

In [None]:
n_plot = 10
figsize = (8, 8)
# plt.figure(figsize)
for row in scores_val_list[-n_plot:]:
    s = row[1]
    key = row[0]
    t = truth_val[key]
    avail = t["avail"].astype('bool')
    true_coord = t["coord"][avail]

    pred_coords = preds_val[key]["coords"]
    confs = preds_val[key]["conf"]
    plt.figure(figsize=figsize)
    plt.plot(*true_coord.T, label='true', linestyle='--', marker='.', alpha=0.8)
    for m, (coord, conf) in enumerate(zip(pred_coords, confs)):
        plt.plot(*coord[avail].T, label=f'pred_{m}: {conf}', alpha=0.6, marker='+')
    plt.legend()
    plt.title(f'{key}, score {s}')
    plt.grid(); plt.show()

In [None]:
df_val = pd.read_csv(val_path)
df_val = df_val.set_index(['timestamp', 'track_id'])
display(df_val)

In [None]:
val_truth_path = cfg['val_100_data_loader']['truth_path']
df_val_truth = pd.read_csv(val_truth_path)
df_val_truth = df_val_truth.set_index(['timestamp', 'track_id'])

In [None]:
if cfg["model_params"]["validate"]:
    rasterizer = build_rasterizer(cfg, dm)
    val_cfg = cfg["val_100_data_loader"]
    val_zarr = ChunkedDataset(dm.require(val_cfg["key"])).open(cached=False)  # try to turn off cache
    val_mask = np.load(val_cfg['mask_path'])["arr_0"]
    val_dataset = AgentDataset(cfg, val_zarr, rasterizer, agents_mask=val_mask)
    print(val_dataset)

In [None]:
if cfg["model_params"]["validate"]:
    plot_validated_trajectory(val_dataset, df_val, df_val_truth, [18431], width=6, height=6, n_cols=1, 
                              scores_val=scores_val)

In [None]:
if cfg["model_params"]["validate"]:
    i_plots = np.random.randint(len(val_dataset), size=9)
    plot_validated_trajectory(val_dataset, df_val, df_val_truth, i_plots, scores_val=scores_val)

# Prediction
Finally we implement the inference to submit to Kaggle when **predict** param is set to True.

In [None]:
%%time
# Test dataset
if cfg["model_params"]['predict']:
    from parallelized_lyft_dataset6 import LyftDataset, lyft_dataset_worker_init_func
#     # Build rasterizer
#     rasterizer = build_rasterizer(cfg, dm)

    test_cfg = cfg["test_data_loader"]
    test_dataset = LyftDataset(cfg, dm, 'test_data_loader', agents_mask_path=test_cfg['mask_path']) 
    test_dataloader = DataLoader(
        test_dataset, shuffle=test_cfg['shuffle'], batch_size=test_cfg['batch_size'],
        num_workers=test_cfg['num_workers'], persistent_workers=False,
        worker_init_fn=lyft_dataset_worker_init_func, pin_memory=False,
        prefetch_factor=test_cfg['prefetch_factor'],  # update in v11
    )
    
#     test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open(cached=False)  # try to turn off cache
#     test_mask = np.load(f"{DIR_INPUT}/scenes/mask.npz")["arr_0"]
#     test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask)
#     test_dataloader = DataLoader(test_dataset, shuffle=test_cfg["shuffle"],
#                                  batch_size=test_cfg["batch_size"], num_workers=test_cfg["num_workers"])
#     print(test_dataset)
    print('test set size:', len(test_dataset))
    
    print('Number of batches for predictoin:', int(np.ceil(len(test_dataset) / cfg['test_data_loader']['batch_size'])))    

In [None]:
%%time
if cfg["model_params"]["predict"]:
    
    model.eval()
    torch.set_grad_enabled(False)

    # store information for evaluation
    future_coords_offsets_pd = []
    timestamps = []
    confidences_list = []
    agent_ids = []
    memorys_pred = []
    t0 = time.time()
    times_pred = []
    iterations_pred = []
    i_update = 10 if test_run else 50
    
    n_test_batches = len(test_dataloader)
    # test_iter = iter(test_dataloader)

    for i, data in enumerate(tqdm_notebook(test_dataloader, total=n_test_batches, mininterval=5.)):
        
        _, preds, confidences = forward(data, model, device, compute_loss=False)
    
        preds = torch.einsum('bmti,bji->bmtj', 
                             preds.double(), 
                             data["world_from_agent"].to(device)[:, :2, :2]).cpu().numpy()

        future_coords_offsets_pd.append(preds.copy())
        confidences_list.append(confidences.cpu().numpy().copy())
        timestamps.append(data["timestamp"].numpy().copy())
        agent_ids.append(data["track_id"].numpy().copy()) 
        
        if i % i_update == 0:
            t = ((time.time() - t0) / 60)
            print('%4d'%i, '%6.2fmins'%t, end=' | ')
            mem = memory()
            iterations_pred.append(i)
            memorys_pred.append(mem)
            times_pred.append(t)
            if test_run and i >= 50:
                break
    print('Total timespent: %6.2fmins'%((time.time() - t0) / 60))
    memory()

In [None]:
preds = None
confidences = None
data = None
test_dataloader = None

del preds
del confidences
del data
del test_dataloader
# del optimizer

In [None]:
gc_memory()

In [None]:
%%time
# create submission to submit to Kaggle
if cfg["model_params"]["predict"]:
    pred_path = 'submission.csv' if on_kaggle else f'{model_name}-submission-{train_round}.csv'
    write_pred_csv(
        pred_path,
        timestamps=np.concatenate(timestamps),
        track_ids=np.concatenate(agent_ids),
        coords=np.concatenate(future_coords_offsets_pd),
        confs=np.concatenate(confidences_list),
    )

In [None]:
if cfg["model_params"]["predict"]:
    plt.figure(figsize=(12, 4))
    plt.plot(iterations_pred, memorys_pred)
    plt.xlabel('steps'); plt.ylabel('memory (GB)')
    plt.grid(); plt.show()

In [None]:
if cfg["model_params"]["predict"]:
    plt.figure(figsize=(12, 4))
    plt.plot(iterations_pred, times_pred, label='measured')
    plt.plot([iterations_pred[0], iterations_pred[-1]], [times_pred[0], times_pred[-1]], label='linear ref', alpha=0.5)
    plt.xlabel('steps'); plt.ylabel('elapsed_time (mins)')
    plt.legend(); plt.grid(); plt.show()

# All training history

In [None]:
local_files = [entry.name for entry in os.scandir('.') if entry.is_file()]
train_records = [f for f in local_files 
                 if f.startswith(f'train_metrics_{model_name}-') and f.endswith('.csv')]
original_train_records = [f for f in local_files 
                          if f.startswith(f'train_metrics_{original_model_name}-') and f.endswith('.csv')]
train_records, original_train_records

In [None]:
# For this case
train_records_used = original_train_records[:train_round-1] + train_records
train_records_used

In [None]:
train_records_used[0].replace('_', '-').replace('-', '.').split('.')

In [None]:
def extract_info_from_name(filename):
    filename_s = filename.replace('_', '-').replace('-', '.').split('.')
    return {'round': int(filename_s[-3]), 
            'n_steps': int(filename_s[-2]),
            'filename': filename}

In [None]:
train_records_explain = [extract_info_from_name(r) for r in train_records_used]
train_records_explain

In [None]:
train_history_dfs = {record['round']: pd.read_csv(record['filename']) for record in train_records_explain}
n_steps_history = {record['round']: record['n_steps'] for record in train_records_explain}
n_steps_history

In [None]:
df_hs = []
cumulative_steps = 0
cumulative_time = 0
for r in range(1, train_round + 1):
    df_h = train_history_dfs[r].copy()
    round_timespent = df_h['elapsed_time (mins)'].values[-1]
    df_h['iterations'] = df_h['iterations'] + cumulative_steps
    df_h['elapsed_time (mins)'] = df_h['elapsed_time (mins)'] + cumulative_time
    df_h['round'] = r
    df_hs.append(df_h)
    cumulative_steps += n_steps_history[r]
    cumulative_time += round_timespent

In [None]:
df_history = pd.concat(df_hs, ignore_index=True)
display(df_history)

In [None]:
# if cfg["model_params"]["train"]:
plt.figure(figsize=(12, 4))
plt.plot(df_history['iterations'], df_history['metrics (all avg)'], label='all avg')
plt.plot(df_history['iterations'], df_history['metrics (update avg)'], label=f'{update_steps}-batch avg')
plt.xlabel('steps'); plt.ylabel('metrics (avg)')
plt.legend()
plt.grid(); plt.show()

plt.figure(figsize=(12, 4))
plt.plot(df_history['iterations'], df_history['metrics (all avg)'], label='all avg')
plt.plot(df_history['iterations'], df_history['metrics (update avg)'], label=f'{update_steps}-batch avg')
plt.xlabel('steps'); plt.ylabel('metrics (avg)')
plt.ylim(-1, 100)
plt.legend()
plt.grid(); plt.show()

plt.figure(figsize=(12, 3))
plt.plot(df_history['iterations'], df_history['memory (GB)'])
plt.xlabel('steps'); plt.ylabel('memory (GB)')
plt.grid(); plt.show()

plt.figure(figsize=(12, 3))
plt.plot(df_history['iterations'], df_history['elapsed_time (mins)'], label='measured data')
plt.plot(
    [df_history['iterations'].values[0], df_history['iterations'].values[-1]], 
    [df_history['elapsed_time (mins)'].values[0], df_history['elapsed_time (mins)'].values[-1]],
    alpha=0.5, label='linear ref')
plt.xlabel('steps'); plt.ylabel('elapsed_time (mins)')
plt.legend(); plt.grid(); plt.show()
# if we see the measured data below the linear, means the training getting slower when the steps increase

# Examine submission

In [None]:
if cfg["model_params"]["predict"]:
    df_sub = pd.read_csv(pred_path)
    df_sub = df_sub.set_index(['timestamp', 'track_id'])
    display(df_sub)

In [None]:
if cfg["model_params"]["predict"]:
    # Build rasterizer
    if not cfg["model_params"]["validate"]:
        rasterizer = build_rasterizer(cfg, dm)

    test_cfg = cfg["test_data_loader"]
    test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open(cached=False)  # try to turn off cache
    test_mask = np.load(test_cfg['mask_path'])["arr_0"]
    test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask)
    print(test_dataset)

In [None]:
if cfg["model_params"]["predict"]:
    plot_predicted_trajectory(test_dataset, df_sub, [18431], width=6, height=6, n_cols=1)

In [None]:
if cfg["model_params"]["predict"]:
    i_plots = np.random.randint(len(test_dataset), size=9)
    plot_predicted_trajectory(test_dataset, df_sub, i_plots)