## Installing l5kit library

Utility script is used for l5kit training and inference purposes. Else GPUs cannot be used due to bug in kaggle notebook. 

**Thanks Peter** [Unofficial fix for l5kit](https://www.kaggle.com/pestipeti/lyft-l5kit-unofficial-fix) for the utility script

## Library import

In [1]:
import os
import numpy as np
import torch

from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet18
from tqdm import tqdm
from typing import Dict

from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer

In [2]:
DIR_INPUT = "../input/lyft-motion-prediction-autonomous-vehicles/"

SINGLE_MODE_SUBMISSION = f"{DIR_INPUT}/single_mode_sample_submission.csv"
MULTI_MODE_SUBMISSION = f"{DIR_INPUT}/multi_mode_sample_submission.csv"

DEBUG = False

## Training configuration parameters

In [3]:
cfg = {
    'format_version': 4,
    'model_params': {
        'model_architecture': 'resnet18',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1
    },
    
    'raster_params': {
        'raster_size': [300, 300],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5
    },
    
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 32,
        'shuffle': True,
        'num_workers': 4
    },
    
    'train_params': {
        'max_num_steps': 100 if DEBUG else 2000,
        'checkpoint_every_n_steps': 500,
        
        # 'eval_every_n_steps': -1
    }
}

In [4]:
cfg

{'format_version': 4,
 'model_params': {'model_architecture': 'resnet18',
  'history_num_frames': 10,
  'history_step_size': 1,
  'history_delta_time': 0.1,
  'future_num_frames': 50,
  'future_step_size': 1,
  'future_delta_time': 0.1},
 'raster_params': {'raster_size': [300, 300],
  'pixel_size': [0.5, 0.5],
  'ego_center': [0.25, 0.5],
  'map_type': 'py_semantic',
  'satellite_map_key': 'aerial_map/aerial_map.png',
  'semantic_map_key': 'semantic_map/semantic_map.pb',
  'dataset_meta_key': 'meta.json',
  'filter_agents_threshold': 0.5},
 'train_data_loader': {'key': 'scenes/train.zarr',
  'batch_size': 32,
  'shuffle': True,
  'num_workers': 4},
 'train_params': {'max_num_steps': 2000, 'checkpoint_every_n_steps': 500}}

In [5]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager(None)

## Dataset and Dataset iterator

In [6]:
train_cfg = cfg["train_data_loader"]

# Rasterizer
rasterizer = build_rasterizer(cfg, dm)

# Train dataset/dataloader
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
print(train_dataset)

+------------+------------+------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16265    |  4039527   | 320124624  |      112.19     |        248.36        |        79.25         |        24.83         |        10.00        |
+------------+------------+------------+-----------------+----------------------+----------------------+----------------------+---------------------+


In [7]:
train_dataloader = DataLoader(train_dataset,
                              shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"],
                              num_workers=train_cfg["num_workers"])

In [8]:
print(type(train_zarr))    #l5kit.data.zarr_dataset.ChunkedDataset
print(type(train_dataset)) #l5kit.dataset.agent.AgentDataset

<class 'l5kit.data.zarr_dataset.ChunkedDataset'>
<class 'l5kit.dataset.agent.AgentDataset'>


A dataset instance contains information of ego vehicle and agents across frames (in key 'image'), 
vehicle level information in 'target_positions', 'target_yaws', 'history_positions', 'history_yaws'

## IMAGE of Train dataset

* image_size = (num_in_channels, raster_size_height, raster_size_width)
* num_in_channels = num_history_channels + 3
* num_history_channels = (num_history_frames + 1) * 2 
* (past frames + current frame) * (one for ego vehicle + one for agents)

In [9]:
print(train_dataset[0]['image'].shape)

(25, 300, 300)


## Train model class

In [10]:
class LyftModel(nn.Module):
    
    def __init__(self, cfg: Dict):
        super().__init__()
        
        self.backbone = resnet18(pretrained=True, progress=True)
        
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels

        self.backbone.conv1 = nn.Conv2d(
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )
        
        # This is 512 for resnet18 and resnet34;
        # And it is 2048 for the other resnets
        backbone_out_features = 512

        # X, Y coords for the future positions (output shape: Bx50x2)
        num_targets = 2 * cfg["model_params"]["future_num_frames"]

        # You can add more layers here.
        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=backbone_out_features, out_features=4096),
        )

        self.logit = nn.Linear(4096, out_features=num_targets)
        
    def forward(self, x):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)
        
        x = self.head(x)
        x = self.logit(x)
        
        return x

In [11]:
torch.cuda.is_available()

True

In [12]:
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = LyftModel(cfg)
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Later we have to filter the invalid steps.
criterion = nn.MSELoss(reduction="none")

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [13]:
torch.cuda.is_available()

True

## Training

In [14]:
# ==== TRAIN LOOP
tr_it = iter(train_dataloader)

progress_bar = tqdm(range(cfg["train_params"]["max_num_steps"]))
losses_train = []

for itr in progress_bar:

    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)

    model.train()
    torch.set_grad_enabled(True)
    
    # Forward pass
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    
    outputs = model(inputs).reshape(targets.shape)
    loss = criterion(outputs, targets)

    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    loss = loss * target_availabilities
    loss = loss.mean()

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses_train.append(loss.item())

    if (itr+1) % cfg['train_params']['checkpoint_every_n_steps'] == 0 and not DEBUG:
        torch.save(model.state_dict(), f'model_state_{itr}.pth')
    
    progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train[-100:])}")

loss: 18.389493942260742 loss(avg): 15.574040813446045: 100%|██████████| 2000/2000 [1:36:59<00:00,  2.91s/it]


In [15]:
torch.save(model.state_dict(), f'model_state_last.pth')