# Lyft Chopped Dataset Creation

In [None]:
from typing import Dict
import numpy as np

from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from pathlib import Path

import os
import yaml
from io import StringIO
import shutil

In [None]:
AGENT_MOTION_CONFIG = """
# Config format schema number
format_version: 4

###################
## Model options
model_params:
  model_architecture: "resnet50"

  history_num_frames: 0
  history_step_size: 1
  history_delta_time: 0.1

  future_num_frames: 50
  future_step_size: 1
  future_delta_time: 0.1

###################
## Input raster parameters
raster_params:
  # raster image size [pixels]
  raster_size:
    - 224
    - 224
  # raster's spatial resolution [meters per pixel]: the size in the real world one pixel corresponds to.
  pixel_size:
    - 0.5
    - 0.5
  # From 0 to 1 per axis, [0.5,0.5] would show the ego centered in the image.
  ego_center:
    - 0.25
    - 0.5
  map_type: "py_semantic"

  # the keys are relative to the dataset environment variable
  satellite_map_key: "aerial_map/aerial_map.png"
  semantic_map_key: "semantic_map/semantic_map.pb"
  dataset_meta_key: "meta.json"

  # e.g. 0.0 include every obstacle, 0.5 show those obstacles with >0.5 probability of being
  # one of the classes we care about (cars, bikes, peds, etc.), >=1.0 filter all other agents.
  filter_agents_threshold: 0.5

###################
## Data loader options
train_data_loader:
  key: "scenes/sample.zarr"
  batch_size: 12
  shuffle: True
  num_workers: 16

val_data_loader:
  key: "scenes/sample.zarr"
  batch_size: 12
  shuffle: False
  num_workers: 16

###################
## Train params
train_params:
  checkpoint_every_n_steps: 10000
  max_num_steps: 5
  eval_every_n_steps: 10000
"""

cfg: dict = yaml.load(StringIO(AGENT_MOTION_CONFIG), Loader=yaml.FullLoader)
print(cfg)

In [None]:
dm = LocalDataManager("/kaggle/input/lyft-motion-prediction-autonomous-vehicles")
rasterizer = build_rasterizer(cfg, dm)

## Original Unchopped Data

In [None]:
eval_zarr = ChunkedDataset(dm.require(cfg["val_data_loader"]["key"])).open()
print(AgentDataset(cfg, eval_zarr, rasterizer))

Note average 248 frmes per scene.

# Chop Evaluation Data (Copy to /tmp)

In [None]:
# ===== GENERATE AND LOAD CHOPPED DATASET
num_frames_to_chop = 100
eval_cfg = cfg["val_data_loader"]

# As the /kaggle/input directory is not writeable as required to chop,
# copy the sample set to /tmp
!rm -rf /tmp/lyft
eval_dir = shutil.copytree(dm.require(eval_cfg["key"]), '/tmp/lyft/sample.zarr')

eval_base_path = create_chopped_dataset(eval_dir, cfg["raster_params"]["filter_agents_threshold"], 
                              num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)
!ls {eval_base_path}

Chopped data mask and ground truth created.

## Load

In [None]:
eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
print(eval_dataset)

Note average 100 frames per scene.

In [None]:
!rm -rf /tmp/lyft

# Chop Evaluation Data (Symlink to /tmp)

In [None]:
eval_dir = dm.require(eval_cfg["key"])
!mkdir /tmp/lyft && ln -s {eval_dir} /tmp/lyft
eval_dir = "/tmp/lyft/" + Path(eval_dir).name
!ls -la  {eval_dir}

In [None]:
eval_base_path = create_chopped_dataset(eval_dir, cfg["raster_params"]["filter_agents_threshold"], 
                              num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)
!ls {eval_base_path}

## Load

In [None]:
eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
print(eval_dataset)

Again looks fine.