In [None]:
!pip install transforms3d
!pip install zarr
!pip install pymap3d

In [None]:
import sys
sys.path.insert(0, '/kaggle/input/nfl5kit-dev/l5kit')

In [None]:
from l5kit.configs import load_config_data
from l5kit.data import ChunkedDataset, LocalDataManager
from l5kit.dataset import EgoDataset, AgentDataset
from l5kit.rasterization import build_rasterizer

In [None]:
import os
import numpy as np

In [None]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "../input/lyft-motion-prediction-autonomous-vehicles/"
dm = LocalDataManager(None)

# get config
cfg = load_config_data("../input/lyftconfigfiles/agent_motion_prediction/agent_motion_config.yaml")

In [None]:
cfg['model_params']['history_num_frames'] = 10
cfg['train_params']['max_num_steps'] = 25000
# cfg['train_params']['max_num_steps'] = 5000
cfg['train_params']['checkpoint_every_n_steps'] = 10000
# cfg['train_params']['checkpoint_every_n_steps'] = 1000
cfg['train_data_loader']['batch_size'] = 12
cfg['train_data_loader']['num_workers'] = 4
cfg['train_data_loader']['key'] = 'scenes/train.zarr'
# cfg['train_data_loader']['key'] = 'scenes/sample.zarr'

cfg['model_params']['render_ego_history'] = True
cfg['model_params']['history_num_frames'] = 10

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.mobilenet import mobilenet_v2

In [None]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x
    
class MobilenetV2LSTM(nn.Module):
    def __init__(self, config):
        super(MobilenetV2LSTM, self).__init__()
        self.cfg = config
        self.batch_size = self.cfg['train_data_loader']['batch_size']
        self.hist_frames = self.cfg['model_params']['history_num_frames']
#         self.fc_infeatures = 1280 + (2 * (self.hist_frames + 1)) + (2 * self.hist_frames) + (self.hist_frames + 1)
        self.fc_infeatures = 1311
        self.num_targets = 2 * self.cfg["model_params"]["future_num_frames"]
        self.cnn = self.build_basecnn()
        self.fc1 = nn.Sequential(
            nn.Dropout(p=0.2, inplace=False),
            nn.Linear(in_features=self.fc_infeatures, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2, inplace=False),
#             nn.Linear(in_features=4096, out_features=self.num_targets)
            nn.Linear(in_features=4096, out_features=128)
        )
        # LSTM DECODER
#         self.lstm = nn.LSTM(input_size=128, hidden_size=128)
#         self.fc2 = nn.Linear(in_features=128, out_features=self.num_targets)
        
        self.lstm = nn.LSTM(input_size=1, hidden_size=128, batch_first=True)
        self.fc2 = nn.Linear(in_features=128, out_features=self.num_targets)
        
        
#     def forward(self, x, vel, accel, yaw):
#         x = self.cnn(x)
#         vel = vel.reshape(self.batch_size, -1)
#         accel = accel.reshape(self.batch_size, -1)
#         yaw = yaw.reshape(self.batch_size, -1)
#         x = torch.cat([x, vel, accel, yaw], dim=1)
#         x = self.fc1(x)

#         return x
    
#     def forward(self, x, vel, yaw):
#         x = self.cnn(x)
#         vel = vel.reshape(self.batch_size, -1)
#         yaw = yaw.reshape(self.batch_size, -1)
#         x = torch.cat([x, vel, yaw], dim=1)
#         x = self.fc1(x)

#         return x
    
    # WITH LSTM DECODER
    def forward(self, x, vel, yaw):
        x = self.cnn(x)
        vel = vel.reshape(self.batch_size, -1)
        yaw = yaw.reshape(self.batch_size, -1)
        x = torch.cat([x, vel, yaw], dim=1)
        x = self.fc1(x)
#         x = x.view(self.batch_size, 1, 128) #??
#         x = x.view(1, self.batch_size, 128) #??
        x = x.view(self.batch_size, 128, 1) # IF BATCH_FIRST=TRUE
        lstm_out, lstm_hidden = self.lstm(x)
        lstm_out = lstm_hidden[0].view(self.batch_size, 128) # USING HIDDEN INSTEAD OF LSTM_OUT
        x = self.fc2(lstm_out)

        return x

    def build_basecnn(self):
        # change input channels number to match the rasterizer's output
        mnet = mobilenet_v2(pretrained=True)
        num_history_channels = (self.cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels
        mnet.features[0][0] = nn.Conv2d(
            num_in_channels,
            mnet.features[0][0].out_channels,
            kernel_size=mnet.features[0][0].kernel_size,
            stride=mnet.features[0][0].stride,
            padding=mnet.features[0][0].padding,
            bias=False,
        )

        mnet.classifier = Identity()
        
        return mnet

In [None]:
# def forward(data, model, device, criterion):
#     im_inputs = data["image"].to(device)
#     vel_inputs = data["history_velocities"].to(device)
#     accel_inputs = data["history_accels"][:, :-1, :].to(device)  # removing last history frame since we don't have accel for it
#     yaw_inputs = data["history_yaws"].to(device)

#     target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
#     targets = data["target_positions"].to(device)

#     # Forward pass
#     outputs = model(im_inputs, vel_inputs, accel_inputs, yaw_inputs).reshape(targets.shape)
#     loss = criterion(outputs, targets)
#     # not all the output steps are valid, but we can filter them out from the loss using availabilities
#     loss = loss * target_availabilities
#     loss = loss.mean()
#     return loss, outputs


def forward(data, model, device, criterion):
    im_inputs = data["image"].to(device)
    vel_inputs = data["history_velocities"].to(device)
    yaw_inputs = data["history_yaws"].to(device)

    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)

    # Forward pass
    outputs = model(im_inputs, vel_inputs, yaw_inputs).reshape(targets.shape)
    loss = criterion(outputs, targets)
    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    loss = loss * target_availabilities
    loss = loss.mean()
    return loss, outputs

In [None]:
rasterizer = build_rasterizer(cfg, dm)
train_cfg = cfg["train_data_loader"]
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)

# create training splits for transfer learning --- wait for pytorch update on kaggle kernels
# train1_size = 25000
# trainrem_size = len(train_dataset) - train1_size
# train_dataset, _ = random_split(train_dataset, [train1_size, trainrem_size], generator=torch.Generator().manual_seed(42))

train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], 
                             num_workers=train_cfg["num_workers"])
print(train_dataset)
print(len(train_dataset))
print(len(train_dataloader))

In [None]:
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MobilenetV2LSTM(cfg).to(device)
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.AdamW(model.parameters())
criterion = nn.MSELoss(reduction="none")

In [None]:
from tqdm.notebook import tqdm

In [None]:
def train(model, device, train_loader, criterion, optimizer, max_steps, checkpoint_steps):
    # Switch model to training mode. This is necessary for layers like dropout, batchnorm etc 
    # which behave differently in training and evaluation mode
    model.train()
    losses_train = []
    step = 1
    
    # We loop over the data iterator, and feed the inputs to the network and adjust the weights.
    train_pbar = tqdm(enumerate(train_loader), desc="Training steps", leave=True, total=max_steps)
    for batch_idx, data in train_pbar:
        # Forward pass to calculate loss
        loss, _ = forward(data, model, device, criterion)

        # Reset the gradients to 0 for all learnable weight parameters
        optimizer.zero_grad()

        # Backward pass: compute the gradients of the loss w.r.t. the model's parameters
        loss.backward()

        # Update the model weights
        optimizer.step()

        # Get average loss of iterations so far
        losses_train.append(loss.item())
        avg_train_loss = np.mean(losses_train)

        train_pbar.set_description(f" Avg train loss: {avg_train_loss}")
        
        if step % checkpoint_steps == 0 and step != max_steps:
            print('Training Loss: {l}, Avg Training Loss: {a}'.format(l=loss.item(), a=avg_train_loss))
        
        if step >= max_steps:
            return avg_train_loss
        step += 1
    
    return avg_train_loss

In [None]:
print(cfg['train_params']['max_num_steps'])
checkpoint_steps =  cfg['train_params']['checkpoint_every_n_steps']   
train_loss = train(model, device, train_dataloader, criterion, optimizer, cfg['train_params']['max_num_steps'], checkpoint_steps)

In [None]:
torch.save(model.state_dict(), 'testrun_mobilenetv2_withlstm_full_train_25k.pth')