In [4]:
import sys
import os
from torch.utils.data import DataLoader, WeightedRandomSampler
import torch.nn as nn
import torch.optim as optim
import torch


# Append the project dir to path
sys.path.append(os.path.join("..", ".."))
from data_pipeline.utils import train_test_split, create_metadata_df, get_sample_weights_of_dataset
from data_pipeline.dataset_xy import CARLADatasetXY

from data_pipeline.data_preprocessing import preprocessing
from models.resnet_rgb.architectures_v3 import Resnet_Baseline_V3, Resnet_Baseline_V3_Dropout
from models.resnet_lidar.lidar_v1 import Resnet_Lidar_V1, Resnet_Lidar_V1_Dropout, Resnet_Lidar_V1_Dropout_2
from models.model_trainer import ModelTrainer

## Choose settings

### Choose data balancing options

In [5]:
# If both false, no balancing is applied
use_balance_by_loss_weighting = False
use_balance_by_over_under_sampling = False

assert not use_balance_by_loss_weighting or not use_balance_by_over_under_sampling

### Choose if additional noisy data shall be used for training

In [14]:
use_data_noisy = True


path_data_noisy = None
if use_data_noisy:
    path_data_noisy = os.path.join("..", "..", "data", "Noise-Dataset")

### Choose model

In [15]:
# Any model from models/resnet_rgb or models/resnet_lidar can be chosen
model = Resnet_Baseline_V3_Dropout(0.25)
model = Resnet_Lidar_V1_Dropout_2()

### Choose training settings

In [16]:
# Choose loss functions to use. Must be ordered alphabetically (i.e. the same like sample_weights keys)
loss_fns_dict = {"brake": nn.L1Loss(reduction='none'), "steer": nn.L1Loss(reduction='none'), "throttle": nn.L1Loss(reduction='none')}
# Choose loss functions weighting factors. Must be ordered alphabetically (i.e. the same like sample_weights keys)
loss_fn_weights = {"brake": 0.05, "steer": 0.45, "throttle": 0.5}

# Choose optimizer used to minimize the loss function
optimizer=optim.Adam(model.parameters(), lr=0.0001)

# Choose number epochs
n_epochs=20

## Create Datasets

In [17]:
path_data = os.path.join("..", "..", "data", "data")

if str(type(model)).__contains__("Lidar"):
        config_xy = {"used_inputs": ["rgb", "lidar_bev", "measurements"], 
                "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
                "y": ["brake", "steer", "throttle"],
                "seq_len": 1
                }
else:
        config_xy = {"used_inputs": ["rgb", "measurements"], 
                "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
                "y": ["brake", "steer", "throttle"],
                "seq_len": 1
                }


In [18]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'mps' if torch.has_mps else 'cpu')
batch_size = 64

# Create df_meta 
df_meta_data = create_metadata_df(path_data, config_xy["used_inputs"])
df_meta_data_noisy = None
if use_data_noisy:
    df_meta_data_noisy = create_metadata_df(path_data_noisy, config_xy["used_inputs"])

# Train/test split
train_test_config = {
    "train": ['Town00', 'Town01', 'Town02', 'Town03', 'Town04', 'Town05', 'Town07', 'Town08', 'Town09', 'Town10'],
    "test": ['Town06']
}
df_meta_data_train, df_meta_data_test_1, df_meta_data_test_2 = train_test_split(df_meta_data, towns_intersect=train_test_config, df_meta_data_noisy=df_meta_data_noisy)

# Create Dataset & DataLoader
dataset_train = CARLADatasetXY(df_meta_data=df_meta_data_train, config=config_xy)
dataset_test_1 = CARLADatasetXY(df_meta_data=df_meta_data_test_1, config=config_xy)
dataset_test_2 = CARLADatasetXY(df_meta_data=df_meta_data_test_2, config=config_xy)

In [19]:
total = len(dataset_train) + len(dataset_test_1) + len(dataset_test_2)

In [23]:
len(dataset_test_2)/total

0.1338574766702883

In [29]:
df_meta_data_test_1["dir"].str.extract("(Town\d\d)")

Unnamed: 0,0
0,Town01
1,Town01
2,Town01
3,Town01
4,Town01
...,...
17301,Town04
17302,Town04
17303,Town04
17304,Town04


## Generate sample weights to be passed to ModelTrainer

In [9]:
sample_weights = None
if use_balance_by_loss_weighting or use_balance_by_over_under_sampling:
    # Dictionary that saves all weights to all y variables 
    sample_weights = get_sample_weights_of_dataset(dataset_train, num_bins=10, multilabel_option=use_balance_by_over_under_sampling) 
    # sample_weights = load_sample_weights()
    print(sample_weights.keys())

## Create DataLoaders

In [10]:
weighted_random_sampler = None
shuffle = True
if use_balance_by_over_under_sampling:
    weighted_random_sampler = WeightedRandomSampler(weights=sample_weights["multilabel"], num_samples=dataset_train.__len__(), replacement=True)
    shuffle = False

dataloader_train = DataLoader(dataset_train, batch_size=batch_size, num_workers=0, shuffle=shuffle, sampler=weighted_random_sampler)
dataloader_test_1 = DataLoader(dataset_test_1, batch_size=batch_size, num_workers=0, shuffle=False, )
dataloader_test_2 = DataLoader(dataset_test_2, batch_size=batch_size, num_workers=0, shuffle=False, )

# Create ModelTrainer & run it

In [11]:
if not use_balance_by_loss_weighting:
    sample_weights = None

In [13]:
model_trainer = ModelTrainer(
    model=model,
    optimizer=optimizer,
    loss_fns=loss_fns_dict,
    loss_fn_weights=loss_fn_weights,
    n_epochs=n_epochs,
    dataloader_train=dataloader_train,
    dataloader_test=dataloader_test_2,
    sample_weights=sample_weights,
    preprocessing=preprocessing,
    )

Model will be trained on: mps


In [14]:
model_trainer.run()

Epoch 1



  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1/20], Step [0/3367], Loss: 0.2712


KeyboardInterrupt: 