# Constant prediction baseline
Since I observed that the mean target_positions is about (5, 0), we try to see the score corresponding to this mean positions.

In [None]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.resnet import resnet50, resnet18, resnet34, resnet101
from tqdm import tqdm

import l5kit
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import matplotlib.pyplot as plt

import os
import random
import time

from IPython.display import display
from tqdm import tqdm_notebook
import gc, psutil

import warnings
warnings.filterwarnings("ignore")

l5kit.__version__

In [None]:
# Memory measurement
def memory(verbose=True):
    mem = psutil.virtual_memory()
    gb = 1024*1024*1024
    if verbose:
        print('Physical memory:',
              '%.2f GB (used),'%((mem.total - mem.available) / gb),
              '%.2f GB (available)'%((mem.available) / gb), '/',
              '%.2f GB'%(mem.total / gb))
    return (mem.total - mem.available) / gb

def gc_memory(verbose=True):
    m = gc.collect()
    if verbose:
        print('GC:', m, end=' | ')
        memory()

memory();

In [None]:
# folder = '..' 
folder = '/kaggle'
test_run = False

In [None]:
# --- Lyft configs ---
cfg = {
    'format_version': 4,
    'data_path': f'{folder}/input/lyft-motion-prediction-autonomous-vehicles',
    'model_params': {
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1,
    },
    'raster_params': {
        'raster_size': [2, 2],  # [224, 224],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5,
    },
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 512,  # 8192
        'shuffle': False,
        'num_workers': 4,  # 4
    },    
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 128,
        'shuffle': False,
        'num_workers': 4,  # 4
    },
}

In [None]:
%%time
# set env variable for data
DIR_INPUT = cfg["data_path"]
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager()
# Build rasterizer
rasterizer = build_rasterizer(cfg, dm)

In [None]:
%%time
# Test dataset
test_cfg = cfg["test_data_loader"]
test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open(cached=False)  # try to turn off cache
test_mask = np.load(f"{DIR_INPUT}/scenes/mask.npz")["arr_0"]
test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask)
test_dataloader = DataLoader(test_dataset, shuffle=test_cfg["shuffle"],
                             batch_size=test_cfg["batch_size"], num_workers=test_cfg["num_workers"])
print(test_dataset)

In [None]:
print('test set size:', len(test_dataset))

# Output constant predictions baseline

In [None]:
print('Number of batches for predictoin:', int(np.ceil(len(test_dataset) / cfg['test_data_loader']['batch_size'])))

In [None]:
data['world_from_agent'].shape[0]

In [None]:
%%time
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
# confidences_list = []
agent_ids = []
memorys_pred = []
t0 = time.time()
times_pred = []
iterations_pred = []
i_update = 5 if test_run else 50

# Assume all the predictions are all constant
# here the constants are average from first 512 of the the training data samples
preds_zeros = torch.zeros(cfg['test_data_loader']['batch_size'], 3, cfg['model_params']['future_num_frames'], 2).double()
preds_zeros[:, :, :, 0] = 2.7429526
preds_zeros[:, :, :, 1] = -0.3545249

for i, data in enumerate(tqdm_notebook(test_dataloader, mininterval=5.)):
    batch_size = data['world_from_agent'].shape[0]
    preds = torch.einsum('bmti,bji->bmtj', preds_zeros[:batch_size], data["world_from_agent"][:, :2, :2]).numpy()

    future_coords_offsets_pd.append(preds.copy())
    # confidences_list.append(confidences.cpu().numpy().copy())
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy()) 

    if i % i_update == 0:
        t = ((time.time() - t0) / 60)
        print('%4d'%i, '%6.2fmins'%t, end=' | ')
        mem = memory()
        iterations_pred.append(i)
        memorys_pred.append(mem)
        times_pred.append(t)
        if test_run and i >= 10:
            break
print('Total timespent: %6.2fmins'%((time.time() - t0) / 60))
memory()

In [None]:
# figure out n of test examples
timestamps_concat = np.concatenate(timestamps)
n_examples = len(timestamps_concat)
# dummy confidence
confidences_concat = np.zeros((n_examples, 3))
confidences_concat[:, 0] = 1

In [None]:
%%time
# create submission to submit to Kaggle
pred_path = 'submission.csv' if folder.endswith('kaggle') else f'{model_name}-submission.csv'
write_pred_csv(
    pred_path,
    timestamps=timestamps_concat,
    track_ids=np.concatenate(agent_ids),
    coords=np.concatenate(future_coords_offsets_pd),
    confs=confidences_concat,
)

In [None]:
df_sub = pd.read_csv(pred_path)
display(df_sub)