## Resnet18 baseline with simple learning rate finder

Resnet18 baseline model with simple Learning rate finder adapted from Sylvian's blog.

Reference:

1. https://www.kaggle.com/isbhargav/guide-to-pytorch-learning-rate-scheduling
2. https://towardsdatascience.com/adaptive-and-cyclical-learning-rates-using-pytorch-2bf904d18dee
3. https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html
4. https://github.com/davidtvs/pytorch-lr-finder

In [1]:
# common imports
import os
import math
import time
import random
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from typing import Dict
from tqdm import tqdm
from pathlib import Path
from tempfile import gettempdir

# interactive plot libraries
import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode, iplot # download_plotlyjs, plot
import plotly.graph_objs as go
from plotly.subplots import make_subplots
init_notebook_mode(connected=True)

# torch imports
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision.models.resnet import resnet50, resnet18, resnet34, resnet101
import torch.nn.functional as F

#from torch_lr_finder import LRFinder

# l5kit imports
import l5kit
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory

In [2]:
import pytorch_lightning as pl

In [3]:
#!rm -rf /kaggle/working/*

In [4]:
print(torch.__version__)

1.6.0


In [5]:
print(l5kit.__version__)

1.1.0


In [6]:
def find_no_of_trainable_params(model):
    total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    #print(total_trainable_params)
    return total_trainable_params

In [7]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
set_seed(42)

## Configs

In [8]:
# --- Lyft configs ---
cfg = {
    'format_version': 4,
    'data_path': "../../lyft-motion-prediction-autonomous-vehicles/",
    'model_params': {
        'model_architecture': 'resnet18',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1,
        'model_name': "R18_10_330_180_dr_conclusion",
        'lr': 6.3e-6,
        'weight_path': "R18_10_330_180_dr_conclusion_2784k.pth",
        'lr_find' : False, 
        'train': True, 
        'validate': False,
        'test': False
    },

    'raster_params': {
        'raster_size': [330, 180],
        'pixel_size': [0.4, 0.4],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5
    },

    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 16,
        'shuffle': True,
        'num_workers': 4
    },
    
    'val_data_loader': {
        'key': 'scenes/validate.zarr',
        'batch_size': 16,
        'shuffle': True,
        'num_workers': 4
    },

    
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 32,
        'shuffle': False,
        'num_workers': 4
    },

    'train_params': {
        'train_start_index' : 174001,
        'max_num_steps': 11,
        'checkpoint_every_n_steps': 5,
        'reduction_factor' : 0.9,
        'step_size' : 1e5
    }
}

In [9]:
NUMBER_OF_HISTORY_FRAMES = cfg['model_params']['history_num_frames'] + 1
RASTER_IMG_SIZE = cfg['raster_params']['raster_size'][0]
NUM_MODES = 3
NUMBER_OF_FUTURE_FRAMES = cfg['model_params']['future_num_frames']

### TRAIN FROM WHERE LEFT OFF, CHANGE THE STARTING INDICES VARIABLE ACCORDINGLY
TRAIN_BATCH_SIZE = cfg['train_data_loader']['batch_size'] 
TRAIN_START_INDICES = cfg['train_params']['train_start_index']
EXTENT_RANGE = 5.0 
MIN_FRAMES_FUTURE = 10

In [10]:
EXTENT_RANGE = 5.0 
MAX_VELOCITY = 20.0
MAX_ACCELERATION = 2.0
MAX_YAW_RATE = np.deg2rad(45)
dt =cfg['model_params']['history_delta_time']

## Rasterize and initialise

In [11]:
# set env variable for data
DIR_INPUT = cfg["data_path"]
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager(None)
rasterizer = build_rasterizer(cfg, dm)

## Loss function

In [12]:
# --- Function utils ---
# Original code from https://github.com/lyft/l5kit/blob/20ab033c01610d711c3d36e1963ecec86e8b85b6/l5kit/l5kit/evaluation/metrics.py
from torch import Tensor


def pytorch_neg_multi_log_likelihood_batch(
    gt: Tensor, pred: Tensor, confidences: Tensor, avails: Tensor
) -> Tensor:
    """
    Compute a negative log-likelihood for the multi-modal scenario.
    log-sum-exp trick is used here to avoid underflow and overflow, For more information about it see:
    https://en.wikipedia.org/wiki/LogSumExp#log-sum-exp_trick_for_log-domain_calculations
    https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    https://leimao.github.io/blog/LogSumExp/
    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(modes)x(time)x(2D coords)
        confidences (Tensor): array of shape (bs)x(modes) with a confidence for each mode in each sample
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    assert len(pred.shape) == 4, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
    batch_size, num_modes, future_len, num_coords = pred.shape

    assert gt.shape == (batch_size, future_len, num_coords), f"expected 2D (Time x Coords) array for gt, got {gt.shape}"
    assert confidences.shape == (batch_size, num_modes), f"expected 1D (Modes) array for gt, got {confidences.shape}"
    assert torch.allclose(torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))), "confidences should sum to 1"
    assert avails.shape == (batch_size, future_len), f"expected 1D (Time) array for gt, got {avails.shape}"
    # assert all data are valid
    assert torch.isfinite(pred).all(), "invalid value found in pred"
    assert torch.isfinite(gt).all(), "invalid value found in gt"
    assert torch.isfinite(confidences).all(), "invalid value found in confidences"
    assert torch.isfinite(avails).all(), "invalid value found in avails"

    # convert to (batch_size, num_modes, future_len, num_coords)
    gt = torch.unsqueeze(gt, 1)  # add modes
    avails = avails[:, None, :, None]  # add modes and cords

    # error (batch_size, num_modes, future_len)
    error = torch.sum(((gt - pred) * avails) ** 2, dim=-1)  # reduce coords and use availability

    with np.errstate(divide="ignore"):  # when confidence is 0 log goes to -inf, but we're fine with it
        # error (batch_size, num_modes)
        error = torch.log(confidences) - 0.5 * torch.sum(error, dim=-1)  # reduce time

    # use max aggregator on modes for numerical stability
    # error (batch_size, num_modes)
    max_value, _ = error.max(dim=1, keepdim=True)  # error are negative at this point, so max() gives the minimum one
    error = -torch.log(torch.sum(torch.exp(error - max_value), dim=-1, keepdim=True)) - max_value  # reduce modes
    # print("error", error)
    return torch.mean(error)


def pytorch_neg_multi_log_likelihood_single(
    gt: Tensor, pred: Tensor, avails: Tensor
) -> Tensor:
    """

    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(time)x(2D coords)
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    # pred (bs)x(time)x(2D coords) --> (bs)x(mode=1)x(time)x(2D coords)
    # create confidence (bs)x(mode=1)
    batch_size, future_len, num_coords = pred.shape
    confidences = pred.new_ones((batch_size, 1))
    return pytorch_neg_multi_log_likelihood_batch(gt, pred.unsqueeze(1), confidences, avails)

## Model

In [19]:
class Lit_MotionPredictor(pl.LightningModule):
    def __init__(self, cfg: Dict, criterion, num_modes=3):
        super().__init__()
        
        architecture = cfg["model_params"]["model_architecture"]
        # This is 512 for resnet18 and resnet34; And it is 2048 for the other resnets
        if architecture == "resnet50":
            backbone_out_features = 2048
        else:
            backbone_out_features = 512

        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels

        # X, Y coords for the future positions (output shape: batch_sizex50x2)
        self.future_len = cfg["model_params"]["future_num_frames"]
        num_targets = 2 * cfg["model_params"]["future_num_frames"]
        self.num_preds = num_targets * num_modes
        self.num_modes = num_modes

        ##### Layers of the model #####
        backbone = eval(architecture)(pretrained=True)
        self.backbone = backbone
        self.backbone.conv1 = nn.Conv2d(
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )
        
        # dropout layer
        self.dropout = nn.Dropout(p =0.4)
        
        # You can add more layers here.
        self.fc1 = nn.Linear(in_features=backbone_out_features, out_features=2048)
        self.fc2 = nn.Linear(in_features=2048, out_features=512)
        self.fc_out = nn.Linear(512, out_features= self.num_preds + self.num_modes)
        
        # loss function
        self.criterion = criterion

    def forward(self, x):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)
        
        # fc layers
        x = self.dropout(self.fc1(x))
        x = self.dropout(self.fc2(x))
        x = self.fc_out(x)

        # pred (batch_size)x(modes)x(time)x(2D coords)
        # confidences (batch_size)x(modes)
        bs, _ = x.shape
        pred, confidences = torch.split(x, self.num_preds, dim=1)
        pred = pred.view(bs, self.num_modes, self.future_len, 2)
        assert confidences.shape == (bs, self.num_modes)
        confidences = torch.softmax(confidences, dim=1)
        return pred, confidences
    
    def training_step(self, batch, batch_idx):
        #print(batch_idx)
        inputs = batch["image"]
        target_availabilities = batch["target_availabilities"]
        targets = batch["target_positions"]
        
        # Forward pass
        preds, confidences = self.forward(inputs)
        loss = self.criterion(targets, preds, confidences, target_availabilities)
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=cfg["model_params"]["lr"])
        return optimizer

In [20]:
model = Lit_MotionPredictor(cfg, pytorch_neg_multi_log_likelihood_batch, 3)

In [21]:
trainer = pl.Trainer(max_steps=cfg['train_params']['max_num_steps'],
                     auto_lr_find= cfg['model_params']['lr_find'],
                    gpus=1)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


## Train Dataloader

In [16]:
# ===== INIT TRAIN DATASET============================================================
if (cfg['model_params']['train'] == True) or (cfg['model_params']['lr_find'] == True):
    train_cfg = cfg["train_data_loader"]
    train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer, min_frame_future=MIN_FRAMES_FUTURE)
    
    print('Length of Train dataset is ' ,len(train_dataset))
    print("==================================TRAIN DATA==================================")
    print(train_dataset)
    
    sampled_indices = np.random.choice(len(train_dataset), size = len(train_dataset), replace = False)
    print('Before slicing, start indices are ', sampled_indices[0:10])
    print('TRAIN_START_INDICES', TRAIN_START_INDICES)
    
    sampled_indices = sampled_indices[TRAIN_START_INDICES:]
    print('After slicing, start indices are ', sampled_indices[0:10])
    
    Datasampler = SubsetRandomSampler(sampled_indices)
    train_dataloader = DataLoader(train_dataset, sampler=Datasampler, batch_size=train_cfg["batch_size"], 
                             num_workers=train_cfg["num_workers"])

Length of Train dataset is  17003687
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16265    |  4039527   | 320124624  |    38735988   |      112.19     |        248.36        |        79.25         |        24.83         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
Before slicing, start indices are  [11022119 16329088 15471840  3288262  1108361 12263917 11947411 12777139
 15959073 15744485]
TRAIN

In [17]:
eval_base_path = cfg['data_path'] + 'scenes/validate_chopped_100'
eval_cfg = cfg["val_data_loader"]
eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], 
                             num_workers=eval_cfg["num_workers"])
print(eval_dataset)

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16220    |  1622000   | 125423254  |    11733321   |      45.06      |        100.00        |        77.33         |        10.00         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


In [22]:
trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=eval_dataloader)


  | Name     | Type    | Params
-------------------------------------
0 | backbone | ResNet  | 11 M  
1 | dropout  | Dropout | 0     
2 | fc1      | Linear  | 1 M   
3 | fc2      | Linear  | 1 M   
4 | fc_out   | Linear  | 155 K 


HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…




1

### LR_finder 

In [22]:
def plot_lr_finder_results(lr_finder): 
    # Create subplot grid
    fig = make_subplots(rows=1, cols=2)
    # layout ={'title': 'Lr_finder_result'}
    
    # Create a line (trace) for the lr vs loss, gradient of loss
    trace0 = go.Scatter(x=lr_finder['log_lr'], y=lr_finder['smooth_loss'],name='log_lr vs smooth_loss')
    trace1 = go.Scatter(x=lr_finder['log_lr'], y=lr_finder['grad_loss'],name='log_lr vs loss gradient')

    # Add subplot trace & assign to each grid
    fig.add_trace(trace0, row=1, col=1)
    fig.add_trace(trace1, row=1, col=2)
    fig.write_html(cfg['model_params']['weight_path'] + '.html')
    iplot(fig, show_link=False)

In [23]:
def find_lr(data_loader, init_value = 1e-8, final_value=1.0, beta = 0.98, num_batches = 200):
    assert(num_batches > 0)
    mult = (final_value / init_value) ** (1/num_batches)
    lr = init_value
    optimizer.param_groups[0]['lr'] = lr
    batch_num = 0
    avg_loss = 0.0
    best_loss = 0.0
    smooth_losses = []
    raw_losses = []
    log_lrs = []
    dataloader_it = iter(data_loader)
    progress_bar = tqdm(range(num_batches))
    
    for idx in progress_bar:
        batch_num += 1
        try:
            data = next(dataloader_it)
        except StopIteration:
            dataloader_it = iter(data_loader)
            data = next(dataloader_it)
        
        # Forward pass
        model.train()
        torch.set_grad_enabled(True)
        
        # handle exception in criterion
        try:
            loss, _, _ = forward(data, model, device)
        except:
            if len(smooth_losses) > 1:
                grad_loss = np.gradient(smooth_losses)
            else:
                grad_loss = 0.0
            lr_finder_results = {'log_lr':log_lrs, 'raw_loss':raw_losses, 
                                 'smooth_loss':smooth_losses, 'grad_loss': grad_loss}
            return lr_finder_results
        
        #Compute the smoothed loss
        avg_loss = beta * avg_loss + (1-beta) *loss.item()
        smoothed_loss = avg_loss / (1 - beta**batch_num)
        
        #Stop if the loss is exploding
        if batch_num > 5 and smoothed_loss > 8 * best_loss:
            if len(smooth_losses) > 1:
                grad_loss = np.gradient(smooth_losses)
            else:
                grad_loss = 0.0
            lr_finder_results = {'log_lr':log_lrs, 'raw_loss':raw_losses, 
                                 'smooth_loss':smooth_losses, 'grad_loss': grad_loss}
            return lr_finder_results
        
        #Record the best loss
        if smoothed_loss < best_loss or batch_num==1:
            best_loss = smoothed_loss
        
        #Store the values
        raw_losses.append(loss.item())
        smooth_losses.append(smoothed_loss)
        log_lrs.append(math.log10(lr))
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print info
        progress_bar.set_description(f"loss: {loss.item()},smoothed_loss: {smoothed_loss},lr : {lr}")

        #Update the lr for the next step
        lr *= mult
        optimizer.param_groups[0]['lr'] = lr
    
    grad_loss = np.gradient(lr_finder_results['smooth_loss'])
    lr_finder_results = {'log_lr':log_lrs, 'raw_loss':raw_losses, 
                         'smooth_loss':smooth_losses, '': grad_loss}
    return lr_finder_results

In [24]:
#plot_lr_finder_results(lr_finder_results)

In [25]:
if cfg['model_params']['lr_find'] == True:
    lr_finder_results = find_lr(train_dataloader)
    plot_lr_finder_results(lr_finder_results)

## Training loop

In [26]:
# ==== TRAINING LOOP =========================================================
if cfg["model_params"]["train"] == True:
    
    print('TRAINING ABOUT TO START ... FROM ', TRAIN_START_INDICES, 
      '  BATCH AND FOR ', cfg['train_params']['max_num_steps'], ' BATCHES', ' WITH BATCH SIZE', TRAIN_BATCH_SIZE)
    
    tr_it = iter(train_dataloader)
    progress_bar = tqdm(range(TRAIN_START_INDICES, 
                              TRAIN_START_INDICES + cfg["train_params"]["max_num_steps"]))
    num_iter = cfg["train_params"]["max_num_steps"]
    losses_train = []
    smooth_losses = []
    iterations = []
    metrics = []
    times = []
    model_name = cfg["model_params"]["model_name"]
    start = time.time()
    iteration = 0
    
    for i in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
        
        # Forward pass
        model.train()
        torch.set_grad_enabled(True)
        loss, _, _ = forward(data, model, device)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses_train.append(loss.item())

        progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")
        if i % cfg['train_params']['checkpoint_every_n_steps'] == 0:
            sample_number = i * cfg['train_data_loader']['batch_size']            
            state = {
              'state_dict': model.state_dict(),
              'optimizer': optimizer.state_dict()
            }
            torch.save(state, f'{model_name}_{sample_number}k.pth')
            iterations.append(i)
            metrics.append(np.mean(losses_train))
            times.append((time.time()-start)/60)
        
        num_samples = i * cfg['train_data_loader']['batch_size']

        #Update the lr for every step size
        if((num_samples % cfg['train_params']['step_size'] == 0)and (i > TRAIN_START_INDICES)):
            #print('Before lr : ' , optimizer.param_groups[0]['lr'])
            optimizer.param_groups[0]['lr'] *= cfg['train_params']['reduction_factor']
            #print('After lr : ' , optimizer.param_groups[0]['lr'])

    sample_number = i * cfg['train_data_loader']['batch_size']
    results = pd.DataFrame({'iterations': iterations, 'metrics (avg)': metrics, 'elapsed_time (mins)': times})
    results.to_csv(f"train_metrics_{model_name}_{sample_number}k.csv", index = False)
    train_losses_csv = pd.DataFrame({'iteration': TRAIN_START_INDICES + np.arange(len(losses_train)), 
                                 'losses_train': losses_train})
    train_losses_csv.to_csv(f"train_losses_{model_name}_{sample_number}k.csv", index = False)
    print(f"Total training time is {(time.time()-start)/60} mins")
    print(results.head())

## Validation loop

In [27]:
def model_validation_score(model, pred_path):
    # ==== EVAL LOOP
    model.eval()
    torch.set_grad_enabled(False)

    # store information for evaluation
    future_coords_offsets_pd = []
    timestamps = []
    confidences_list = []
    agent_ids = []
    progress_bar = tqdm(eval_dataloader)

    for data in progress_bar:

        _, preds, confidences = forward(data, model, device)

        #fix for the new environment
        preds = preds.cpu().numpy()
        world_from_agents = data["world_from_agent"].numpy()
        centroids = data["centroid"].numpy()
        coords_offset = []

        # convert into world coordinates and compute offsets
        for idx in range(len(preds)):
            for mode in range(3):
                preds[idx, mode, :, :] = transform_points(preds[idx, mode, :, :], world_from_agents[idx]) - centroids[idx][:2]

        future_coords_offsets_pd.append(preds.copy())
        confidences_list.append(confidences.cpu().numpy().copy())
        timestamps.append(data["timestamp"].numpy().copy())
        agent_ids.append(data["track_id"].numpy().copy())  
    
    write_pred_csv(pred_path,
               timestamps=np.concatenate(timestamps),
               track_ids=np.concatenate(agent_ids),
               coords=np.concatenate(future_coords_offsets_pd),
               confs=np.concatenate(confidences_list),
              )
    
    metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
    for metric_name, metric_mean in metrics.items():
        print(metric_name, metric_mean)
    
    #return [future_coords_offsets_pd, confidences_list, timestamps, agent_ids]

In [28]:
if cfg['model_params']['validate'] == True:
    eval_base_path = cfg['data_path'] + 'scenes/validate_chopped_100'
    eval_cfg = cfg["val_data_loader"]
    eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
    eval_mask_path = str(Path(eval_base_path) / "mask.npz")
    eval_gt_path = str(Path(eval_base_path) / "gt.csv")

    eval_zarr = ChunkedDataset(eval_zarr_path).open()
    eval_mask = np.load(eval_mask_path)["arr_0"]
    # ===== INIT DATASET AND LOAD MASK
    eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
    eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], 
                                 num_workers=eval_cfg["num_workers"])
    print(eval_dataset)

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16220    |  1622000   | 125423254  |    11733321   |      45.06      |        100.00        |        77.33         |        10.00         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


In [29]:
if cfg['model_params']['validate'] == True:
    pred_path = f"{gettempdir()}/pred.csv"
    model_validation_score(model, pred_path)

100%|██████████| 5919/5919 [38:11<00:00,  2.58it/s]


neg_multi_log_likelihood 51.84549934571347
time_displace [0.04976345 0.08536804 0.11834483 0.15023673 0.18067379 0.2105909
 0.23958917 0.26813768 0.29628994 0.3244213  0.35065958 0.37636185
 0.40208841 0.42675426 0.45015161 0.47352587 0.4958519  0.51831182
 0.53876084 0.56009103 0.57952961 0.59907501 0.61771141 0.63686877
 0.65594245 0.67467286 0.69180811 0.70923417 0.7262071  0.74357093
 0.76115834 0.77805949 0.7952134  0.81144455 0.82906339 0.84802606
 0.86576121 0.88474551 0.90226602 0.92096034 0.94039315 0.95919419
 0.98020456 1.00284298 1.02640984 1.04959655 1.07349758 1.09756949
 1.12593692 1.15363975]
