In [1]:
!pip install torch==1.11.0+cpu torchvision==0.12.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
!pip install mmcv-full==1.5.3 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.11/index.html
!pip install mmdet==2.25.0 -f https://download.openmmlab.com/mmdet/dist/cpu/torch1.11/index.html

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu
Collecting torch==1.11.0+cpu
  Downloading https://download.pytorch.org/whl/cpu/torch-1.11.0%2Bcpu-cp310-cp310-linux_x86_64.whl (169.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.2/169.2 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchvision==0.12.0+cpu
  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.12.0%2Bcpu-cp310-cp310-linux_x86_64.whl (14.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.7/14.7 MB[0m [31m45.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: torch, torchvision
  Attempting uninstall: torch
    Found existing installation: torch 2.1.2+cpu
    Uninstalling torch-2.1.2+cpu:
      Successfully uninstalled torch-2.1.2+cpu
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.16.2+cpu
    Uninstalling torchvision-0.16.2+cpu:
  

In [6]:
import os
import sys
import random
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
sys.path.append('/kaggle/input/transfuser-e2e-scripts')

# torch imports
import torch
import torch.nn as nn

## CARLA dataset

In [7]:
from config import GlobalConfig
from data import CARLA_Data

root_dir = '/kaggle/input/carla-e2e-data/CARLA_data'
config = GlobalConfig(root_dir=root_dir, setting='all')
train_set = CARLA_Data(root=config.train_data, config=config)
val_set = CARLA_Data(root=config.val_data, config=config)
print(f"There are {len(train_set)} samples in training set")

100%|██████████| 2/2 [00:00<00:00,  5.69it/s]
Loading 1546 lidars from 2 folders
100%|██████████| 1/1 [00:00<00:00, 21.30it/s]
Loading 314 lidars from 1 folders
There are 1546 samples in training set


Create pytorch style dataloaders

In [9]:
from torch.utils.data import DataLoader

g_cuda = torch.Generator(device='cpu')
g_cuda.manual_seed(torch.initial_seed())

# We need to seed the workers individually otherwise random processes 
# in the dataloader return the same values across workers!
def seed_worker(worker_id):
    # Torch initial seed is properly set across the different workers,
    # we need to pass it to numpy and random.
    worker_seed = (torch.initial_seed()) % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


dataloader_train = DataLoader(train_set, shuffle=True, batch_size=4, worker_init_fn=seed_worker, generator=g_cuda, num_workers=4)
dataloader_val   = DataLoader(val_set, shuffle=False, batch_size=4, worker_init_fn=seed_worker, generator=g_cuda, num_workers=4)

In [10]:
sample_data = next(iter(dataloader_train))
print(f"sample data is of type {type(sample_data)} and has following keys")

for k,v in sample_data.items():
    print(k, list(v.shape))

sample data is of type <class 'dict'> and has following keys
rgb [4, 3, 160, 704]
bev [4, 160, 160]
depth [4, 160, 704]
semantic [4, 160, 704]
steer [4]
throttle [4]
brake [4]
speed [4]
theta [4]
x_command [4]
y_command [4]
light [4]
target_point [4, 2]
target_point_image [4, 1, 256, 256]
lidar [4, 2, 256, 256]
label [4, 20, 7]
ego_waypoint [4, 4, 2]


## Helper functions

In [11]:
def train_validate_model(model, num_epochs, model_name, optimizer, 
                         device, dataloader_train, dataloader_valid, 
                         lr_scheduler = None, output_path = '.'):

    # initialize placeholders for running values
    train_results = []    
    val_results = []    
    min_val_loss = np.Inf

    # move model to device
    model.to(device)
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        epoch_detailed_train_losses  = {key: 0.0 for key in config.detailed_losses}
        epoch_detailed_train_losses['weighted_loss'] = 0.0
        
        with tqdm(dataloader_train, unit="batch") as tepoch:
            for batch_idx, data in enumerate(tepoch):
                tepoch.set_description(f"Epoch {epoch}")

                # load data to gpu, according to type
                for k in ['rgb', 'depth', 'lidar', 'label', 'ego_waypoint', \
                          'target_point', 'target_point_image', 'speed']:
                    data[k] = data[k].to(device, torch.float32)
                for k in ['semantic', 'bev']:
                    data[k] = data[k].to(device, torch.long)
                
                # forward pass, store losses
                losses = model(data)
                loss = torch.tensor(0.0).to(device, dtype=torch.float32)
                for key, value in losses.items():
                    loss += detailed_weights[key] * value
                    epoch_detailed_train_losses[key] += float(detailed_weights[key] * value.item())
                epoch_detailed_train_losses['weighted_loss'] += float(loss.item())
                
                # backward pass
                optimizer.zero_grad(set_to_none=True)
                loss.backward()
                optimizer.step()
                
                # log losses
                tepoch.set_postfix(loss=loss.item())
                
                if batch_idx == 2:
                    break
                    
            # average losses across batches
            for k,v in epoch_detailed_train_losses.items():
                epoch_detailed_train_losses[k] = v / len(dataloader_train)
            
#         validation_loss, validation_metric = evaluate_model(
#                         model, dataloader_valid, criterion, metric_class, num_classes, device)

        
        train_results.append(epoch_detailed_train_losses)
    
    return train_results, val_results

## Model

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 


from model import LidarCenterNet
model = LidarCenterNet(config, device, config.backbone, image_architecture='regnety_032', 
                           lidar_architecture='regnety_032', use_velocity=False)
model.to(device);

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print ('Total trainable parameters: ', params)

import torch.optim as optim
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

model.safetensors:   0%|          | 0.00/78.1M [00:00<?, ?B/s]

Total trainable parameters:  168018327


## Training

In [13]:
N_EPOCHS = 3

detailed_weights = {key: config.detailed_losses_weights[idx] for idx, key in enumerate(config.detailed_losses)}

train_results, val_results = train_validate_model(model, num_epochs=N_EPOCHS, model_name='Transfuser_regnet032', 
                                                  optimizer=optimizer,device = device, dataloader_train=dataloader_train, 
                                                  dataloader_valid = dataloader_val)

import pandas as pd
train_results = pd.DataFrame(train_results)
train_results.to_csv(f'Transfuser_regnet032y_{N_EPOCHS}.pth')

Epoch 0:   1%|          | 2/387 [00:38<2:02:35, 19.11s/batch, loss=39.2]
Epoch 1:   1%|          | 2/387 [00:42<2:16:04, 21.21s/batch, loss=47.9]
Epoch 2:   1%|          | 2/387 [00:41<2:13:14, 20.77s/batch, loss=38.8]


In [14]:
train_results

Unnamed: 0,loss_wp,loss_bev,loss_depth,loss_semantic,loss_center_heatmap,loss_wh,loss_offset,loss_yaw_class,loss_yaw_res,loss_velocity,loss_brake,weighted_loss
0,0.012898,0.008486,0.037401,0.015362,0.40359,0.000931,0.000754,0.015047,2.2e-05,0.0,0.0,0.494493
1,0.01388,0.008472,0.036979,0.015286,0.326941,0.000897,0.000702,0.015012,2.2e-05,0.0,0.0,0.41819
2,0.01297,0.008408,0.034747,0.015108,0.333041,0.000872,0.000699,0.014922,1.3e-05,0.0,0.0,0.420781


In [16]:
import plotly.graph_objects as go

# Create traces
fig = go.Figure()

for key in config.detailed_losses + ['weighted_loss']: 
    fig.add_trace(go.Scatter(x=np.arange(len(train_results)), y=train_results[key],
                        mode='lines', name=f'{key.replace("loss_", "")}_TR'))

fig.update_layout(title='Transfuser with regnet032y backbone',width=1200, height=600, 
                  xaxis_title='Epoch', yaxis_title='Loss components')    
fig.show()