In [None]:
import numpy as np
import os
import psutil
import torch

from torch import nn
from torch.utils.data import DataLoader
from torchvision.models.mobilenet import mobilenet_v2
from tqdm.notebook import tqdm
from typing import Dict

from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.evaluation import write_pred_csv
from l5kit.rasterization import build_rasterizer

In [None]:
INPUT_DIR = '/kaggle/input/lyft-motion-prediction-autonomous-vehicles'
WEIGHTS_FILE = '/kaggle/input/lyft-training-mobilenetv2/l5run3_mobilenetv2.pth'

In [None]:
cfg = {
    'format_version': 4,
    'model_params': {
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1
    },
    
    'raster_params': {
        'raster_size': [224, 224],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5
    },
    
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 8,
        'shuffle': False,
        'num_workers': 0
    }

}

In [None]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = INPUT_DIR
dm = LocalDataManager(None)

# Init test dataset

In [None]:
# ===== INIT DATASET
test_cfg = cfg["test_data_loader"]

# Rasterizer
rasterizer = build_rasterizer(cfg, dm)

# Test dataset/dataloader
test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open()
test_mask = np.load(f"{INPUT_DIR}/scenes/mask.npz")["arr_0"]
test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask)
# test_dataset, _ = random_split(test_dataset, [100, 71122-100])
test_dataloader = DataLoader(test_dataset,
                             shuffle=test_cfg["shuffle"],
                             batch_size=test_cfg["batch_size"],
                             num_workers=test_cfg["num_workers"])


print(test_dataloader)
print(len(test_dataset))
print(len(test_dataloader))

# Define model

In [None]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x
    
class MobilenetV2LSTM(nn.Module):
    def __init__(self, config: Dict):
        super(MobilenetV2LSTM, self).__init__()
        self.cfg = config
        self.batch_size = self.cfg['test_data_loader']['batch_size']
        self.hist_frames = self.cfg['model_params']['history_num_frames']
        self.fc_infeatures = 1280 + (2 * (self.hist_frames + 1)) + (2 * self.hist_frames) + (self.hist_frames + 1)
        self.num_targets = 2 * self.cfg["model_params"]["future_num_frames"]
#         self.seq_len = 1
#         self.input_size = 128
#         self.hidden_size = 128
        self.cnn = self.build_basecnn()
        self.fc1 = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features=self.fc_infeatures, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features=4096, out_features=self.num_targets)
        )
#         self.lstm = nn.LSTM(
#             input_size=self.input_size,
#             hidden_size=self.hidden_size,
#             num_layers=1,
#             batch_first=True
#         )
#         self.fc2 = nn.Linear(in_features=128, out_features=100)
#         self.hidden_cell = (torch.zeros(self.batch_size, 1, self.hidden_size),
#                             torch.zeros(self.batch_size, 1, self.hidden_size))

    def forward(self, x, vel, accel, yaw):
        x = self.cnn(x)
        vel = vel.reshape(self.batch_size, -1)
        accel = accel.reshape(self.batch_size, -1)
        yaw = yaw.reshape(self.batch_size, -1)
        x = torch.cat([x, vel, accel, yaw], dim=1)
        x = self.fc1(x)
#         cnn_out = self.fc1(x)
#         lstm_in = cnn_out.view(self.batch_size, self.seq_len, self.input_size)
#         lstm_out, self.hidden_cell = self.lstm(lstm_in)
#         fc_in = lstm_out.view(self.batch_size, lstm_out.shape[2])
#         x = self.fc2(fc_in)

        return x

    def build_basecnn(self):
        # change input channels number to match the rasterizer's output
        mnet = mobilenet_v2(pretrained=False)
        num_history_channels = (self.cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels
        mnet.features[0][0] = nn.Conv2d(
            num_in_channels,
            mnet.features[0][0].out_channels,
            kernel_size=mnet.features[0][0].kernel_size,
            stride=mnet.features[0][0].stride,
            padding=mnet.features[0][0].padding,
            bias=False,
        )

        mnet.classifier = Identity()
        
        return mnet

In [None]:
# def build_model(cfg: Dict) -> torch.nn.Module:
#     # load pre-trained Conv2D model
#     model = mobilenet_v2(pretrained=False)

#     # change input channels number to match the rasterizer's output
#     num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
#     num_in_channels = 3 + num_history_channels
#     model.features[0][0] = nn.Conv2d(
#         num_in_channels,
#         model.features[0][0].out_channels,
#         kernel_size=model.features[0][0].kernel_size,
#         stride=model.features[0][0].stride,
#         padding=model.features[0][0].padding,
#         bias=False,
#     )
#     # change output size to (X, Y) * number of future states
#     num_targets = 2 * cfg["model_params"]["future_num_frames"]
#     model.classifier[1] = nn.Linear(in_features=model.classifier[1].in_features, out_features=num_targets)

#     return model

In [None]:
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MobilenetV2LSTM(cfg).to(device)
# model = build_model(cfg).to(device)
model.load_state_dict(torch.load(WEIGHTS_FILE, map_location=device))
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
# criterion = nn.MSELoss(reduction="none")

# Prediction

In [None]:
model.eval()

future_coords_offsets_pd = []
timestamps = []
agent_ids = []

with torch.no_grad():
    dataiter = iter(test_dataloader)
    
    pbar = tqdm(dataiter)
    for data in pbar:
        
        im_inputs = data["image"].to(device)
        vel_inputs = data["history_velocities"].to(device)
        accel_inputs = data["history_accels"][:, :-1, :].to(device)  # removing last history frame since we don't have accel for it
        yaw_inputs = data["history_yaws"].to(device)

        target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
        targets = data["target_positions"].to(device)
        # Forward pass
        outputs = model(im_inputs, vel_inputs, accel_inputs, yaw_inputs).reshape(targets.shape)
        
        future_coords_offsets_pd.append(outputs.cpu().numpy().copy())
        timestamps.append(data["timestamp"].numpy().copy())
        agent_ids.append(data["track_id"].numpy().copy())
        
        pbar.set_description(f'RAM used: {psutil.virtual_memory().percent}%')

In [None]:
write_pred_csv('submission.csv',
               timestamps=np.concatenate(timestamps),
               track_ids=np.concatenate(agent_ids),
               coords=np.concatenate(future_coords_offsets_pd))