In [1]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet34
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset, read_pred_csv
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

from torch import Tensor
from collections import OrderedDict, defaultdict


import os

## Prepare Data path and load cfg

By setting the `L5KIT_DATA_FOLDER` variable, we can point the script to the folder where the data lies.

Then, we load our config file with relative paths and other configurations (rasteriser, training params...).

In [2]:
DEBUG = False

# training cfg
validation_cfg = {
    
    'format_version': 4,
    
     ## Model options
    'model_params': {
        'model_architecture': 'resnet34',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1,
    },

    ## Input raster parameters
    'raster_params': {
        
        'raster_size': [224, 224], # raster's spatial resolution [meters per pixel]: the size in the real world one pixel corresponds to.
        'pixel_size': [0.5, 0.5], # From 0 to 1 per axis, [0.5,0.5] would show the ego centered in the image.
        'ego_center': [0.25, 0.5],
        'map_type': "py_semantic",
        
        # the keys are relative to the dataset environment variable
        'satellite_map_key': "aerial_map/aerial_map.png",
        'semantic_map_key': "semantic_map/semantic_map.pb",
        'dataset_meta_key': "meta.json",

        # e.g. 0.0 include every obstacle, 0.5 show those obstacles with >0.5 probability of being
        # one of the classes we care about (cars, bikes, peds, etc.), >=1.0 filter all other agents.
        'filter_agents_threshold': 0.5
    },

    ## Data loader options
    'valid_data_loader': {
        'key': "scenes/validate_chopped_100/validate.zarr",
        'batch_size': 1,
        'shuffle': False,
        'num_workers': 0
    },

    ## Valid params
    'valid_params': {
        'checkpoint_every_n_steps': 5000,
        'max_num_steps': 10 if DEBUG else 1000
    }
}

common_cfg = {
    'seed': 500,
    'output_dir': './outputs/1011_2/',
    'epoch': 2,
    'train_step': 5 if DEBUG else 500,
    'valid_step': 5 if DEBUG else 50,
    'train_max': 12,
    'learning_rate': 1e-3
}


In [3]:
OUTPUT_DIR = common_cfg['output_dir']
INPUT_ROOT = Path('/home/knikaido/work/Lyft/data/')
DATA_DIR = INPUT_ROOT / 'lyft-motion-prediction-autonomous-vehicles/'

In [4]:
pred_path = f"{OUTPUT_DIR}pred_1114_3.csv"

In [5]:
eval_gt_path = str(f"{str(DATA_DIR)}/scenes/validate_chopped_100/gt.csv")

In [6]:
# metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
# for metric_name, metric_mean in metrics.items():
#     print(metric_name, metric_mean)


In [7]:
def validate_dicts(ground_truth: dict, predicted: dict) -> bool:
    """
    Validate GT and pred dictionaries by comparing keys

    Args:
        ground_truth (dict): mapping from (track_id + timestamp) to an element returned from our csv utils
        predicted (dict): mapping from (track_id + timestamp) to an element returned from our csv utils

    Returns:
        (bool): True if the 2 dicts match (same keys)

    """
    valid = True

    num_agents_gt = len(ground_truth)
    num_agents_pred = len(predicted)

    if num_agents_gt != num_agents_pred:
        print(f"Incorrect number of rows in inference csv. Expected {num_agents_gt}, Got {num_agents_pred}")
        valid = False

    missing_agents = ground_truth.keys() - predicted.keys()
    if len(missing_agents):
        valid = False

    for missing_agents in missing_agents:
        print(f"Missing agents: {missing_agents}")

    unknown_agents = predicted.keys() - ground_truth.keys()
    if len(unknown_agents):
        valid = False

    for unknown_agent in unknown_agents:
        print(f"Unknown agents: {unknown_agent}")

    return valid


In [8]:
def compute_metrics_csv_(ground_truth_path: str, inference_output_path: str, metrics) -> dict:
    """
    Compute a set of metrics between ground truth and prediction csv files

    Arguments:
        ground_truth_path (str): Path to the ground truth csv file.
        inference_output_path (str): Path to the csv file containing network output.
        metrics (List[Callable]): a list of callable to be applied to the elements retrieved from the 2
        csv files

    Returns:
        dict: keys are metrics name, values is the average metric computed over the elements
    """

    assert len(metrics) > 0, "you must pass at least one metric to compute"

    ground_truth = OrderedDict()
    inference = OrderedDict()

    for el in read_gt_csv(ground_truth_path):
        ground_truth[el["track_id"] + el["timestamp"]] = el
    for el in read_pred_csv(inference_output_path):
        inference[el["track_id"] + el["timestamp"]] = el

    if not validate_dicts(ground_truth, inference):
        raise ValueError("Error validating csv, see above for details.")

    metrics_dict = defaultdict(list)

    for key, ground_truth_value in ground_truth.items():
        gt_coord = ground_truth_value["coord"]
        avail = ground_truth_value["avail"]

        pred_coords = inference[key]["coords"]
        conf = inference[key]["conf"]
        for metric in metrics:
            metrics_dict[metric.__name__].append(metric(gt_coord, pred_coords, conf, avail))

    # compute average of each metric
    return {metric_name: np.mean(values, axis=0) for metric_name, values in metrics_dict.items()}

In [9]:
# metrics = compute_metrics_csv_(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
# for metric_name, metric_mean in metrics.items():
#     print(metric_name, metric_mean)
    


In [10]:
ground_truth = OrderedDict()
inference = OrderedDict()

In [11]:
from typing import Iterator, List, Optional
import csv
from itertools import chain

MAX_MODES = 7
def read_pred_csv_(csv_path: str) -> Iterator[dict]:
    """
    Generator function that returns a line at the time from the csv file as a dict

    Args:
        csv_path (str): path of the csv to read

    Returns:
        Iterator[dict]: dict keys are the csv header fieldnames
    """

    reader = csv.DictReader(open(csv_path, "r"))
    fieldnames = reader.fieldnames
    assert fieldnames is not None, "error reading fieldnames"

    # exclude timestamp, track_id and MAX_MODES confs, the rest should be (x, y) * len * 3 = 6*len
    future_len = (len(fieldnames) - (2 + 7)) / 14
    print(future_len)
    assert future_len == int(future_len), "error estimating len"
    future_len = int(future_len)

    coords_labels_list = [_generate_coords_keys(future_len, mode_index=idx) for idx in range(MAX_MODES)]
    confs_labels = _generate_confs_keys()

    for row in reader:
        track_id = row["track_id"]
        timestamp = row["timestamp"]

        conf = np.asarray([np.float64(row[conf_label]) for conf_label in confs_labels])

        coords = []
        for idx in range(MAX_MODES):
            coord = np.asarray([np.float64(row[coord_label]) for coord_label in coords_labels_list[idx]])
            coords.append(coord.reshape((future_len, 2)))

        coords = np.stack(coords, axis=0)

        yield {"track_id": track_id, "timestamp": timestamp, "coords": coords, "conf": conf}
        
def _generate_coords_keys(future_len: int, mode_index: int = 0) -> List[str]:
    """
    Generate keys like coord_x00, coord_y00... that can be used to get or set value in CSV.
    Two keys for each mode and future step.

    Args:
        future_len (int): how many prediction the data has in the future
        mode_index (int): what mode are we reading/writing

    Returns:
        List[str]: a list of keys
    """
    return list(
        chain.from_iterable([[f"coord_x{mode_index}{i}", f"coord_y{mode_index}{i}"] for i in range(future_len)])
    )

def _generate_confs_keys() -> List[str]:
    """
    Generate modes keys (one per mode)

    Returns:
        List[str]: a list of keys
    """
    return [f"conf_{i}" for i in range(MAX_MODES)]

In [12]:
for el in read_gt_csv(eval_gt_path):
    ground_truth[el["track_id"] + el["timestamp"]] = el

In [13]:
for el in read_pred_csv_(pred_path):
    inference[el["track_id"] + el["timestamp"]] = el

50.0


In [14]:
metrics_dict = defaultdict(list)

In [15]:
errs = []
for key, ground_truth_value in ground_truth.items():
    gt_coord = ground_truth_value["coord"]
    avail = ground_truth_value["avail"]

    pred_coords = inference[key]["coords"]
    conf = inference[key]["conf"]
#     print(pred_coords.shape)
#     break


    errs.append([key, neg_multi_log_likelihood(gt_coord, pred_coords, conf, avail)])
    
#     break

In [16]:
errs = np.array(errs)

In [17]:
np.mean(errs[:, 1].astype('float32'))

15.405956