In [None]:

"""
Max time for:
    - GPU: 30h
    - TPU: 20h
"""
import time

import os
os.system("mkdir /kaggle/working/results")
import timer_app_ras as timer
MAX_TRAIN_TIME = 60*60*11 #60*60*28 # Default 24hours 
GPU_START = time.perf_counter()

import installer_app_ras
from dataset_wrapper_app_ras import DatasetWrapper
from model_app_ras import build_resnet50, build_efficientnetb4, build_regnet, dump_model
from rasterizer_app_ras import build_custom_rasterizer
import evaluation_app_ras as eval_util_functions
import logger_app_ras as logging


from sys import stdin
from signal import SIGINT, signal, getsignal
from tempfile import gettempdir
from datetime import datetime

import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from adabound import AdaBound
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.evaluation.metrics import *
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory

import matplotlib.pyplot as plt
from tqdm import tqdm
import datetime
import traceback
from pathlib import Path



# TPU imports
#from torch_xla.core import xla_model

# Ensemble Class

In [None]:
class EnsemblingModel(nn.Module):
    def __init__(self, device, out_dim, models):
        '''
            Create an instance of the ensembling model
        
            Params:
                device        PyTorch device
                out_dim       Output features of each model in `models`
                models        List of models assumed to have same input and output dimensions
        '''
        super().__init__()

        self.device = device

        # NOTE: Move all models to the given device
        self.models = models
        for model in models:
            model.to(device)

        self.relu = nn.ReLU()
        self.output = nn.Linear(in_features=len(models)*out_dim, out_features=out_dim)
        
        
    def forward(self, X):
        predictions = torch.flatten(
            torch.cat(tuple(model(X)[:, None, :] for model in self.models), dim=1),
            start_dim=1,
        )
        return self.output(self.relu(predictions))

# Build Model (with model_app-ras.py)

In [None]:
def build_model(cfg, build_model, file=None):
    '''
    Builds regression model using CNN.

    Model inputs:   selected history frames(box) + present frame(semantic)
    Model outputs:  target coords(x, y) for each predicted future frame

    Params:
        cfg     config dict
        file    a tuple(path to model, in_channels, out_features),
                where 'in_channels' and 'out_features' are referring to
                the model that is being loaded
    Returns:
        the configured model
    '''
    # NOTE: Calculate number of input channels
    # NOTE: We multiply by 2 since each frame consists of an agent and an ego
    #       image
    history_box_frames = 2 * len(cfg['model_params']['history_box_frames'])
    # NOTE: We add 3 since the semantic rasterizer images always have 3 channels
    num_in_channels = 3 + history_box_frames

    # NOTE: Calculate output dimensions
    # NOTE: We multiply by 2 since we're predicting x and y coords for eac
    num_targets = 2 * cfg['model_params']['future_num_frames']

    return build_model((num_in_channels, num_targets), file)

In [None]:
def forward(data, model, device, criterion):
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    # Forward pass
    outputs = model(inputs).reshape(targets.shape)
    loss = criterion(outputs, targets)
    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    loss = loss * target_availabilities
    loss = loss.mean()
    return loss, outputs

In [None]:
def get_agent_motion_config():
    amc = {
        'format_version': 4,
        'model_params': {
            'model_architecture': 'efficientnet_b4',
            'history_num_frames': 5,
            'history_box_frames': [0, 1, 2, 4, 8],
            'future_num_frames': 50,
            'step_time': 0.1,
            'render_ego_history': True
        },
        'raster_params': {
            'raster_size': [384, 192],
            'pixel_size': [0.25, 0.25],
            'ego_center': [0.25, 0.5],
            'map_type': 'box_semantic_fast',
            'satellite_map_key': 'lyft-motion-prediction-autonomous-vehicles/aerial_map/aerial_map.png',
            'semantic_map_key': 'lyft-motion-prediction-autonomous-vehicles/semantic_map/semantic_map.pb',
            'dataset_meta_key': 'lyft-motion-prediction-autonomous-vehicles/meta.json',
            'filter_agents_threshold': 0.5,
            'disable_traffic_light_faces': False,
            'set_origin_to_bottom': True,
        },
        'train_data_loader': {
            'key': "lyft-full-chopped", #"lyft-full-training-set/train_full.zarr",# TODO: set to directory with all chunks
            'batch_size': 8,
            'shuffle': True,
            'num_workers': 0,
            'split_offset': 0               # TODO: CHANGE IF TRAINING SHOULD BE RESUMED FROM ANOTHER CHUNK
        },
        'val_data_loader': {
            'key': "chopped-val-dataset/validate_chopped_100/validate.zarr", #'lyft-motion-prediction-autonomous-vehicles/scenes/validate.zarr',
            'batch_size': 18,
            'shuffle': False,
            'num_workers': 2,
        },
        'train_params': {
            'checkpoint_every_n_steps': 1000,
            'max_num_steps': 1000,
            'eval_every_n_steps': 10000,
        },
    }
    return amc

In [None]:
# rasterizer = build_rasterizer(cfg, dm)
dm = LocalDataManager('/kaggle/input/')
cfg = get_agent_motion_config()
rasterizer = build_custom_rasterizer(cfg, dm)

# Build trained models for ensembling

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model list for ensemble
models = []

# # resnet50
# res_50 = build_model(
#         cfg,
#         build_resnet50,
#         # TODO: Handle dimensions better
#         ('../input/ensemble-models/resnet50_9h40m.pt', 13, 100),    # TODO: Change for new model
#     ).to(device)
# models.append(res_50)



# efficient-net_b4
eff_b4 = build_model(
        cfg,
        build_efficientnetb4,
        # TODO: Handle dimensions better
        ('../input/ensemble-models/efficientnet_b4_22hours.pt', 13, 100),    # TODO: Change for new model
    ).to(device)
models.append(eff_b4)


reg_ = build_model(
        cfg,
        build_regnet,
        ('../input/ensemble-models/model_reg.pt', 13, 100)
    ).to(device)
models.append(reg_)

# regnet (from Marko)
# reg_ = build_model(
#         cfg,
#         build_regnet,
#         # TODO: Handle dimensions better
#         ('PATH', 13, 100),    # TODO: Change for new model
#     ).to(device)
# models.append(reg_)

# Set ensemble model and opt. and crit.

In [None]:
ensemble_model = EnsemblingModel(device, 100, models).to(device)
optimizer = optim.Adam(ensemble_model.parameters(), lr=1e-3)
criterion = nn.MSELoss(reduction="none")

# Load the Train Data and Training Ensemble model

In [None]:
import timer_app_ras as timer
# Initialize logger
now = datetime.datetime.now()
now = now.strftime('%Y-%m-%d_%H:%M:%S')
VERSION = f"{now}"
logging.create_logger(VERSION)
logger = logging.get_logger(VERSION) 
logger.info(f"Initialized Logger {VERSION}")

# initialize timer
timer = timer.Timer(logger)
logger.info("Initialized timer")

# start training
logger.info("************** Ensemble Training **************")
cfg = get_agent_motion_config()


train_cfg = cfg['train_data_loader']
num_workers = train_cfg['num_workers']

# NOTE: Setup signal handler to manual interruption
original_handler = getsignal(SIGINT)
state = { 'interrupted': False}
def handler(_signum, _frame):
    state['interrupted'] = True
    signal(SIGINT, original_handler)
signal(SIGINT, handler)


dirs = [*map(lambda d: f"{train_cfg['key']}/{d}", next(os.walk(f"/kaggle/input/{train_cfg['key']}"))[1])]
logger.info(f"all zarrs: {dirs}")
to_skip = train_cfg["split_offset"]

# SET EPOCHS
total_epochs = 1 #5
num_epochs = 0
#while not state['interrupted']: # for epochs to be specified(or not)
for _ in range(total_epochs):
    logger.info("Start Epoch " + str(num_epochs+1))

    # ==== Train for all chopped subsets
    for train_path in dirs:
        if to_skip > 0:
            to_skip -= 1
            continue
        logger.info(f"Start training with {train_path}")
        # NOTE: Load training dataset
        dataset = ChunkedDataset(dm.require(train_path)).open()
        dataset = AgentDataset(
            cfg,
            dataset,
            build_custom_rasterizer(cfg, dm),
            min_frame_history=0,
            min_frame_future=10,
        ) if num_workers <= 0 else DatasetWrapper(
            cfg,
            dm,
            dataset,
            min_frame_history=0,
            min_frame_future=10,
        )
        num_max_batches = np.ceil(
            len(dataset) / train_cfg['batch_size'],
        ).astype(int)
        dataloader_opts = dict(
            shuffle=train_cfg['shuffle'],
            batch_size=train_cfg['batch_size'],
            num_workers=num_workers,
        )
        if num_workers > 0:
            dataloader_opts['prefetch_factor'] = 2 * num_workers
        train_dataloader = DataLoader(
            dataset,
            **dataloader_opts
        )

        # ==== TRAIN in a subset
        tr_it = iter(train_dataloader)
        progress_bar = tqdm(range(cfg["train_params"]["max_num_steps"]))
        losses_train = []

        # max_num_steps
        for _ in progress_bar:
            try:
                data = next(tr_it)
            except StopIteration:
                tr_it = iter(train_dataloader)
                data = next(tr_it)
            ensemble_model.train()
            torch.set_grad_enabled(True)
            data['image'] = data['image'].float() / 255
            loss, outputs = forward(data, ensemble_model, device, criterion) 

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses_train.append(loss.item())
            progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")



            # EVALUATION
            gt_train_path = "/kaggle/working/results/gt_train.csv"
            pred_path = "/kaggle/working/results/pred_train.csv"
            eval_util_functions.write_csvs(data, gt_train_path, pred_path, device, outputs)

            # calculate metrics
            metrics = eval_util_functions.compute_metrics_csv(gt_train_path, pred_path, [
                                                                neg_multi_log_likelihood,
                                                                rmse,
                                                                average_displacement_error_oracle,
                                                                average_displacement_error_mean,
                                                                final_displacement_error_oracle,
                                                                final_displacement_error_mean])       
            # create csv file
            eval_util_functions.save_metrics('results/metrics.csv', loss.item(), np.mean(losses_train), metrics) 

        # dump model
        dump_model(ensemble_model, './model_ensemble.pt')
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    num_epochs += 1
print(f'Stopped at epoch {num_epochs+1}')
#print(f'Training took {datetime.datetime.now() - start_time}')

if torch.cuda.is_available():
    torch.cuda.empty_cache()

# NOTE: Release memory after training

# # NOTE: Save model to disk

In [None]:
dirs

# Evaluation

In [None]:
# ===== GENERATE AND LOAD CHOPPED DATASET
#num_frames_to_chop = 100
cfg = get_agent_motion_config()
eval_cfg = cfg["val_data_loader"]
eval_base_path = '/kaggle/input/chopped-val-dataset/validate_chopped_100'
#eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"], num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)

In [None]:

eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(dm.require(eval_zarr_path)).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = DatasetWrapper(cfg, dm, eval_zarr, agents_mask=eval_mask)
eval_dataloader = DataLoader(
    eval_dataset,
    shuffle=eval_cfg["shuffle"],
    batch_size=eval_cfg["batch_size"],
    num_workers=eval_cfg["num_workers"],
    prefetch_factor=2*eval_cfg["num_workers"],
)
print(eval_dataset)

# Visualise Results

In [None]:


ensemble_model.eval() # not training anymore
torch.set_grad_enabled(False)

# build a dict to retrieve future trajectories from GT
gt_rows = {}
for row in read_gt_csv(eval_gt_path):
    gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]

eval_ego_dataset = EgoDataset(cfg, eval_dataset.dataset, rasterizer)

for frame_number in range(99, len(eval_zarr.frames), 1000):  # start from last frame of scene_0 and increase by 100
    agent_indices = eval_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0)).copy()
    center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
    
    predicted_positions = []
    target_positions = []

    for v_index in agent_indices:
        data_agent = eval_dataset[v_index]
        data = torch.from_numpy(data_agent["image"]).float() / 255

        out_net = ensemble_model(data.unsqueeze(0).to(device))
        out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy() # output -> converts into (x,y)
        # store absolute world coordinates
        predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
        # retrieve target positions from the GT and store as absolute coordinates
        track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
        target_positions.append(gt_rows[str(int(track_id)) + str(timestamp)] + data_agent["centroid"][:2])


    # convert coordinates to AV point-of-view so we can draw them
    predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
    target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])

    plt.figure(figsize=(16, 4))

    plt.subplot(1, 2, 1)
    plt.title('Predicted')
    im_ego_pred = im_ego.copy()
    draw_trajectory(im_ego_pred, predicted_positions, PREDICTED_POINTS_COLOR)
    plt.imshow(im_ego_pred)

    plt.subplot(1, 2, 2)
    plt.title('Ground truth')
    im_ego_target = im_ego.copy()
    draw_trajectory(im_ego_target, target_positions, TARGET_POINTS_COLOR)
    plt.imshow(im_ego_target)

    plt.show()