# Model ResNet

https://www.pluralsight.com/guides/introduction-to-resnet

#### TODOS
1. DONE Debugging, does output make sense?
    1. Resize images
    2. preprocessing fixes
    5. replace scaling by proper function
2. try on leaderboard
3. Include Odometry and fuse into heads
    - Speed
    - Location
4. navigation
5. controller
6. Evaluation on Test set, Modularization



## Dependencies

In [5]:
# MODEL STUFF
import torch
import torch.nn as nn
import torch.optim as optim
#import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import *
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms

# GENERAL STUFF
import time
import copy
import os
import sys
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
sys.path.append("../data_pipeline")
# import data_sampler, dataset
from data_sampler import WeightedSampler
from dataset import CARLADataset#, CARLADatasetMultiProcessing
from utils import train_test_split, create_metadata_df

## Model

In [7]:
class MyResnet(nn.Module):
    
    def __init__(self):
        super().__init__()

        
        # ResNet Architecture with pretrained weights, also bigger resnets available
        self.net = torchvision.models.resnet18(pretrained=True) # weights=True
        num_ftrs = self.net.fc.in_features

        # Top layer of ResNet which you can modify. We choose Identity to use it as Input for all the heads
        self.net.fc = nn.Identity()
        
        # Input Layer fuer cmd, spd
        self.cmd_input = nn.Sequential(
            nn.Linear(7, 7),
            nn.LeakyReLU() # TODO
        )
        
        self.spd_input = nn.Sequential(
            nn.Linear(1, 1),
            nn.LeakyReLU() # TODO
        )
        
        # Regression Heads for Throttle, Brake and Steering
        self.thr_head = nn.Sequential(
            nn.Linear(num_ftrs+8, 1),
            nn.Sigmoid() # [0,1] Range Output
        )
        
        self.brk_head = nn.Sequential(
            nn.Linear(num_ftrs+8, 1),
            nn.Sigmoid() # [0,1] Range Output
        )
        
        self.str_head = nn.Sequential(
            nn.Linear(num_ftrs+8, 1),
            nn.Tanh() # [-1,1] Range Output
        )

    # Forward Pass of the Model
    def forward(self, rgb, cmd, spd):
        rgb = self.net(rgb) # BRG
        cmd = self.cmd_input(cmd)
        spd = self.spd_input(spd)
        
        x = torch.cat((rgb, cmd, spd),1)
        
        #x = self.net.fc(x)
        return self.thr_head(x), self.str_head(x), self.brk_head(x) # 3 Outputs since we have 3 Heads

## Data Loaders, Data Sets

In [12]:
# path_data = "../data/Dataset Ege/Dataset Ege 1"
path_data = os.path.join("..", "data", "data")

# train_path = "../input" #data must be shared publically, doing local for now
# test_path = "../input"

config = {"used_inputs": ["rgb","measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake", "command"],
        "seq_len": 1
        }

df_meta_data = create_metadata_df(path_data, config["used_inputs"])
df_meta_data_train, df_meta_data_test = train_test_split(df_meta_data, towns={"train": ["Town04", "Town06"], "test": ["Town05"]})
# df_meta_data_train, df_meta_data_test = df_meta_data, df_meta_data

train_dataset = CARLADataset(root_dir=path_data, df_meta_data=df_meta_data_train, config=config)
test_dataset = CARLADataset(root_dir=path_data, df_meta_data=df_meta_data_test, config=config)

# weighted_sampler = WeightedSampler(dataset=train_dataset)

batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [13]:
len(train_dataloader)

13

In [14]:
len(test_dataloader)

13

In [15]:
mean = torch.tensor([79.6657, 81.5673, 105.6161])
std = torch.tensor([66.8309, 60.1001, 66.2220])


transform_norm = transforms.Compose([
    transforms.Normalize(mean, std)
])


## Training

In [21]:
# Initialise Model (GPU or CPU)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'mps' if torch.has_mps else 'cpu')
print(device)
net = MyResnet()
net.to(device)

mps




MyResnet(
  (net): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [22]:
def to_cuda_if_possible(data):
    return data.to(device) if device else data

In [23]:
def forward_pass(data):
    # further preprocessing
    start_prep = time.time()
    X_rgb = torch.squeeze(transform_norm(data["rgb"])).float()
    labels = data["command"]
    # labels = torch.where(labels == -1, torch.tensor(0), labels).to(torch.int64) # Replace by -1 by 0
    labels = torch.where(labels == -1, torch.tensor(0, dtype=labels.dtype), labels).to(torch.int64) # Replace by -1 by 0
    # Convert the labels to a one hot encoded tensor
    one_hot = torch.nn.functional.one_hot(labels, num_classes=7)
    X_cmd = torch.squeeze(one_hot).float()
    X_spd = ((data["speed"]-speed_mean)/speed_std).float()
    
    Y_throttle = data["throttle"].float()
    Y_steer = data["steer"].float()
    Y_brake = data["brake"].float()
    end_prep = time.time()
    time_prep = end_prep - start_prep
    # move to GPU
    start_trans_cuda = time.time()
    X_rgb = to_cuda_if_possible(X_rgb)
    X_cmd = to_cuda_if_possible(X_cmd)
    X_spd = to_cuda_if_possible(X_spd)
    
    Y_throttle = to_cuda_if_possible(Y_throttle)
    Y_steer = to_cuda_if_possible(Y_steer)
    Y_brake = to_cuda_if_possible(Y_brake)
    end_trans_cuda = time.time()
    time_trans_cuda = end_trans_cuda - start_trans_cuda
    # compute outputs
    start_forward = time.time()
    optimizer.zero_grad()
    Y_hat = net(X_rgb, X_cmd, X_spd)
    end_forward = time.time()
    time_forward = end_forward - start_forward
    start_trans_cuda_2 = time.time()
    Y_hat_throttle = to_cuda_if_possible(Y_hat[0])
    Y_hat_steer = to_cuda_if_possible(Y_hat[1])
    Y_hat_brake = to_cuda_if_possible(Y_hat[2])

    # get labels from data
    Y_throttle = to_cuda_if_possible(data["throttle"].float())
    Y_steer = to_cuda_if_possible(data["steer"].float())
    Y_brake = to_cuda_if_possible(data["brake"].float())
    end_trans_cuda_2 = time.time()
    time_trans_cuda_2 = end_trans_cuda_2 - start_trans_cuda_2

    # Calculate Loss
    start_loss = time.time()
    loss_throttle = 0.7*criterion(Y_hat_throttle, Y_throttle)
    loss_steer = 0.2*criterion(Y_hat_steer, Y_steer)
    loss_brake = 0.1*criterion(Y_hat_brake, Y_brake)
    loss = sum([loss_throttle, loss_steer, loss_brake])
    end_loss = time.time()
    time_loss = end_loss - start_loss
    return loss, time_prep, time_trans_cuda, time_forward, time_trans_cuda_2, time_loss

In [24]:
# Loss and Optimizer
criterion = nn.L1Loss() # Easy to interpret #nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001) #optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

## Model Trainer Prototype

In [25]:
"""
Checking time consumption in training pipeline:
- Data Loading: cannot directly measure it because done in for loop itself and other stuff in for loop
- Data Preprocessing
- Training step
- Validation step

"""

'\nChecking time consumption in training pipeline:\n- Data Loading: cannot directly measure it because done in for loop itself and other stuff in for loop\n- Data Preprocessing\n- Training step\n- Validation step\n\n'

In [26]:
speed_mean = 2.250456762830466
speed_std = 0.30215840254891313

In [92]:
times_prep, times_trans_cuda, times_forward, times_trans_cuda_2, times_loss, times_backprop, times_val, times_epoch, times_monitoring = [], [], [], [], [], [], [], [], []

In [93]:
%%time

n_epochs = 1
print_every = 200
valid_loss_min = np.Inf
val_loss = []
train_loss = []
total_step = len(train_dataloader)

validate = False

for epoch in range(1, n_epochs+1):
    start_epoch = time.time()
    
    running_loss = 0.0
    print(f'Epoch {epoch}\n')
    
    # Work through batches
    for batch_idx, data in enumerate(train_dataloader):
        
        loss, time_prep, time_trans_cuda, time_forward, time_trans_cuda_2, time_loss = forward_pass(data)
        times_prep.append(time_prep)
        times_trans_cuda.append(time_trans_cuda)
        times_forward.append(time_forward)
        times_trans_cuda_2.append(time_trans_cuda_2)
        times_loss.append(time_loss)

        start_backprop = time.time()
        # Backprop
        loss.backward()
        optimizer.step()
        end_backprop = time.time()
        time_backprop = end_backprop - start_backprop
        times_backprop.append(time_backprop)
        
        start_monitoring = time.time()
        running_loss += loss.item()
        if (batch_idx) % print_every is 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, n_epochs, batch_idx, total_step, loss.item()))
        end_monitoring = time.time()
        time_monitoring = end_monitoring - start_monitoring
        times_monitoring.append(time_monitoring)
        
    # Epoch finished, evaluate network and save if network_learned
    train_loss.append(running_loss/total_step)
    print(f'\ntrain-loss: {np.mean(train_loss):.4f},') # TODO SOLVE NAN ISSUES
    batch_loss = 0

    
    # Evaluation on Test set, skipped for now
    
    start_val = time.time()
    if validate:
        with torch.no_grad():
            net.eval()
            
            for batch_idx, data in enumerate(test_dataloader):
                
                loss, time_prep, time_trans_cuda, time_forward, time_trans_cuda_2, time_loss = forward_pass(data)
                
                batch_loss += loss.item()
            val_loss.append(batch_loss/len(test_dataloader))
            #network_learned = batch_loss < valid_loss_min
            print(f'validation loss: {np.mean(val_loss):.4f}, \n') # TODO SOLVE NAN ISSUES

            
            if False:#network_learned:
                valid_loss_min = batch_loss
                torch.save(net.state_dict(), 'resnet.pt')
                print('Improvement-Detected, save-model')
    end_val = time.time()
    time_val = end_val - start_val
    times_val.append(time_val)

    # Back to training
    net.train()
    end_epoch = time.time()
    time_epoch = end_epoch - start_epoch
    times_epoch.append(time_epoch)
    
    



Epoch 1

Epoch [1/1], Step [0/13], Loss: 0.0993

train-loss: 0.0829,
CPU times: user 1.35 s, sys: 657 ms, total: 2.01 s
Wall time: 4.48 s


In [94]:
import pandas as pd

df_speed_stats = pd.DataFrame({
"times_prep" : times_prep, 
"times_trans_cuda" : times_trans_cuda, 
"times_forward": times_forward, 
"times_trans_cuda_2" : times_trans_cuda_2, 
"times_loss" : times_loss, 
"times_backprop" : times_backprop, 
"times_monitoring" : times_monitoring, 
})
df_speed_stats = df_speed_stats.sum().to_frame().T
df_speed_stats["time_val"] = times_val[0]
df_speed_stats["time_untracked"] = times_epoch[0] - df_speed_stats.sum().sum()
df_speed_stats = df_speed_stats.T
df_speed_stats.columns = ["time_sec"]
df_speed_stats["time_%"] = df_speed_stats["time_sec"] / df_speed_stats["time_sec"].sum()
df_speed_stats = df_speed_stats.sort_values(by="time_%", ascending=False)

In [95]:
df_speed_stats

Unnamed: 0,time_sec,time_%
times_monitoring,2.222558,0.49639
time_untracked,0.816905,0.182449
times_trans_cuda_2,0.726366,0.162228
times_backprop,0.492207,0.10993
times_forward,0.098928,0.022095
times_prep,0.070115,0.01566
times_trans_cuda,0.038943,0.008698
times_loss,0.011417,0.00255
time_val,0.0,0.0


In [31]:
df_speed_stats.to_pickle("df_speed_stats_second.pkl")

In [None]:
# after 16 min 400/1514 batches are finished --> ~ 74min for an epoch (Town 04/05)
# 56GB (entire set as Moritz trained)/ (20GB train set --> Town04/05): 2.8 * 74min = 207 min (3.5 hours)


### Test predictions

In [60]:
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
iterator = iter(test_dataloader)
#print(next(iter(test_dataloader)).keys())

In [61]:
data = next(iterator)
#data

In [62]:
X_rgb = torch.squeeze(transform_norm(data["rgb"])).float().to(device)
labels = data["command"]
labels = torch.where(labels == -1, torch.tensor(0), labels).to(torch.int64) # Replace by -1 by 0
# Convert the labels to a one hot encoded tensor
one_hot = torch.nn.functional.one_hot(labels, num_classes=7).to(device)
X_cmd = torch.squeeze(one_hot).float().to(device)
X_spd = ((data["speed"]-speed_mean)/speed_std).float().to(device)

target_ = (data["throttle"], data["steer"], data["brake"])
with torch.no_grad():
    net.eval()
    outputs_ = net(X_rgb, X_cmd, X_spd)

RuntimeError: expected scalar type long int but found double

In [60]:
# Durchschnittlicher abs. fehler
for i in [0,1,2]:
    print(np.mean(abs(outputs_[i].cpu().numpy()-target_[i].cpu().numpy())))

0.29738437256956357

0.017281432621530257

0.016537449466326848


Bias Variance

In [53]:
# Variance 

for i in [0,1,2]:
    outputs = (outputs_[i].cpu().numpy())
    #print(outputs)
    mean_outputs = np.mean(outputs_[i].cpu().numpy())
    #print(mean_outputs)
    diff = (outputs-mean_outputs)**2
    #print(diff)
    value = np.mean(diff)
    print(value)

0.013398256

0.00063567644

0.008254821


In [54]:
# Bias
for i in [0,1,2]:
    targets = (target_[i].cpu().numpy())
    #print(outputs)
    mean_outputs = np.mean(outputs_[i].cpu().numpy())
    #print(mean_outputs)
    diff = outputs-mean_outputs
    #print(diff)
    value = np.mean(diff)
    print(value)

-0.2074748

0.010485157

4.656613e-10


In [None]:
"""
for i in [0,1,2]:
    print(np.mean(abs(target_[i].cpu().numpy())))
    print(np.std(abs(target_[i].cpu().numpy())))
"""

In [None]:
i =0

In [None]:
print(np.round(outputs_[i].cpu().numpy(),1))

In [None]:
print(np.round(target_[i].cpu().numpy(),1))

### IMG Processing

BGR is now standard FOR carla agent and training

In [None]:
import cv2
idx, batch = next(enumerate(test_dataloader))
print(batch["rgb"].shape)

In [None]:
img = batch["rgb"][0]#.shape
img = img.numpy().astype(np.uint8).reshape(160,960,3)
print(img.shape)

#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # NUR HIER, NICHT IN CARLA AGENT
print(img.shape)
print(type(img))
transform = transforms.Compose([transforms.ToPILImage()])

tensor = transform(img)

#print(type(tensor))

tensor.show()

#torch.tensor(tensor)

TEST Normalization

In [None]:
tensor = transform_norm(torch.squeeze(data["rgb"],1))

In [None]:
tensor = torch.squeeze(transform_norm(data["rgb"])).float()

In [None]:
for i in range(64):
    print(np.mean(tensor.numpy()[i], axis = (1,2)))

In [None]:
np.mean(tensor.numpy(), axis = (0,2,3))

### adding Navigation and speed

In [None]:
test_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
iterator = iter(test_dataloader)



In [None]:
data = next(iterator)
#data["speed"]
#data["command"]

Command

In [None]:
import torch

# Assume labels is a 1D tensor with values from 0 to 6

labels = data["command"]
labels = torch.where(labels == -1, torch.tensor(0), labels) # Replace by -1 by 0
labels = labels.to(torch.int64)

# Convert the labels to a one hot encoded tensor
one_hot = torch.nn.functional.one_hot(labels, num_classes=7)
one_hot = torch.squeeze(one_hot)

print(one_hot.shape)

Speed

In [12]:
# calc mean over trainingsset
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
print(len(train_dataloader))
iterator = iter(train_dataloader)

7347


In [14]:
i = 0
summe = []
for batch_idx, data in enumerate(train_dataloader):
    #print(data)
    if i % 100 == 0:
        print(i)
    summe.append(np.mean(data["speed"].numpy()))
    if i >= 1000:
        break
    i += 1


0

100

200

300

400

500

600

700

800

900

1000


In [None]:
#print(summe)

In [15]:
print(np.mean(summe)) # 2.2078979146598274
print(np.std(summe)) # 0.22455625005948113
speed_mean = np.mean(summe)
speed_std = np.std(summe)

2.250456762830466

0.30215840254891313


In [None]:
batch = next(iterator)
#print(np.round(batch["speed"].numpy(),2))

In [None]:
(batch["speed"]-speed_mean)/speed_std

### Vanishing/Exploding Gradients

In [None]:
"""
for name, param in net.thr_head.named_parameters():
    if param.requires_grad:
        print(name, param.data.cpu().numpy())
"""

In [57]:
for name, param in net.spd_input.named_parameters():
    if param.requires_grad:
        print(name, np.max(abs(param.data.cpu().numpy())))

0.weight 1.216921

0.bias 0.0005766605


## Saving and Loading

Not suited for leaderboard agents

In [None]:
#torch.save(net, 'rgb_resnet.pth')

In [None]:
#net = torch.load('rgb_resnet.pth')

suited for leaderboard agents

In [20]:
#torch.save(net.state_dict(), "rgb_resnet_cmd_spd.pth")

In [None]:
#net = MyResnet()
#net.load_state_dict(torch.load("rgb_resnet_cmd_spd.pth"))
#net.cuda()

## Testing Time

In [None]:
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
idx, X = next(enumerate(test_dataloader))
img = transform_norm(X["rgb"])
img.shape

In [None]:
torch.squeeze(img,1).shape

In [None]:
# ohne preprocessing ca 16-17 sekunden. Mit preprocessing ca 37 sekunden ~2gb
# 24.12: 44 batches -> preprocessing 26 sec, training & preprocessing 69 sec
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
print(len(test_dataloader))
at = time.time()
for batch_idx, data in enumerate(test_dataloader):
    #print(batch_idx)
    data_ = torch.squeeze(transform_norm(data["rgb"]),1).float()
    #print(data_.shape)
et = time.time()
print(et-at)
