# Model ResNet

https://www.pluralsight.com/guides/introduction-to-resnet

#### TODOS
1. DONE Debugging, does output make sense?
    1. Resize images
    2. preprocessing fixes
    5. replace scaling by proper function
2. try on leaderboard
3. Include Odometry and fuse into heads
    - Speed
    - Location
4. navigation
5. controller
6. Evaluation on Test set, Modularization



## Dependencies

In [2]:
# MODEL STUFF
import torch
import torch.nn as nn
import torch.optim as optim
#import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import *
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms

# GENERAL STUFF
import time
import copy
import os
import sys
sys.path.insert(1, 'C:/Users/morit/OneDrive/UNI/Master/WS22/APP-RAS/Programming/data_pipeline') # TODO

# DATA ENGINEERING
from data_sampler import WeightedSampler
from dataset import CARLADataset#, CARLADatasetMultiProcessing

## Model

In [3]:
class MyResnet(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        # ResNet Architecture with pretrained weights, also bigger resnets available
        self.net = torchvision.models.resnet18(weights=True)
        num_ftrs = self.net.fc.in_features

        # Top layer of ResNet which you can modify. We choose Identity to use it as Input for all the heads
        self.net.fc = nn.Identity()
        
        
        # Regression Heads for Throttle, Brake and Steering
        self.thr_head = nn.Sequential(
            nn.Linear(num_ftrs, 1),
            nn.Sigmoid() # [0,1] Range Output
        )
        
        self.brk_head = nn.Sequential(
            nn.Linear(num_ftrs, 1),
            nn.Sigmoid() # [0,1] Range Output
        )
        
        self.str_head = nn.Sequential(
            nn.Linear(num_ftrs, 1),
            nn.Tanh() # [-1,1] Range Output
        )

    # Forward Pass of the Model
    def forward(self, x):
        x = self.net(x)
        #x = self.net.fc(x)
        return self.thr_head(x), self.str_head(x), self.brk_head(x) # 3 Outputs since we have 3 Heads

## Data Loaders, Data Sets

In [4]:
#path_ege_data = os.path.join("..", "..", "data", "Dataset Ege")
path_ege_data = "D:/data/int_r_dataset_23_11/Routes_routes_30mshortroutes_Town01_Scenario9_Seed5000"

config = {"used_inputs": ["rgb","measurements"], 
        "used_measurements": ["speed", "steer", "throttle", "brake"],
        "seq_len": 1
        }

train_dataset = CARLADataset(root_dir=path_ege_data, config=config, transform=None)
weighted_sampler = WeightedSampler(dataset=train_dataset)

batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

### Perprocessing Workarounds

In [5]:
def normalize_batch(tensors):
    liste = []
    for tensor in tensors:
        tensor = preprocess(tensor )#* 1/255
        liste.append(tensor)
    return torch.stack(liste)

In [6]:
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

img = next(iter(train_dataloader))["rgb"].transpose(1,4)[:,:,:,:,0].float()
img.shape

torch.Size([64, 3, 160, 960])

In [7]:
normalize_batch(img).shape#[0,:,:,:]

torch.Size([64, 3, 160, 960])

In [8]:
np.mean(normalize_batch(img).numpy(), axis =(0,2,3))

array([0.18263042, 0.22423622, 0.45745656], dtype=float32)

## Training

In [8]:
# Initialise Model (GPU or CPU)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = MyResnet().cuda() if device else net
net



MyResnet(
  (net): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [9]:
def to_cuda_if_possible(data):
    return data.to(device) if device else data

In [10]:
# Loss and Optimizer
criterion = nn.L1Loss() # Easy to interpret #nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001) #optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [11]:
%%time

n_epochs = 10
#print_every = 10
valid_loss_min = np.Inf
val_loss = []
train_loss = []
total_step = len(train_dataloader)

run = True

for epoch in range(1, n_epochs+1):
    
    running_loss = 0.0
    print(f'Epoch {epoch}\n')
    
    # Work through batches
    for batch_idx, data in enumerate(train_dataloader): #data: (['idx', 'rgb', 'speed', 'steer', 'throttle', 'brake'])

        # further preprocessing
        data_ = normalize_batch(data["rgb"].transpose(1,4)[:,:,:,:,0]).float() # HOTFIX
        data["throttle"]=data["throttle"].float()
        data["steer"]=data["steer"].float()
        data["brake"]=data["brake"].float()

        # move to GPU
        data_ = data_.to(device)
        
        data["throttle"] = to_cuda_if_possible(data["throttle"])
        data["steer"] = to_cuda_if_possible(data["steer"])
        data["brake"] = to_cuda_if_possible(data["brake"])
        
        # compute outputs
        optimizer.zero_grad()
        
        outputs = net(data_)
        output_throttle = to_cuda_if_possible(outputs[0])
        output_steer = to_cuda_if_possible(outputs[1])
        output_brake = to_cuda_if_possible(outputs[2])
        
        loss_throttle = criterion(output_throttle, data["throttle"].float())
        loss_steer = criterion(output_steer, data["steer"].float())
        loss_brake = criterion(output_brake, data["brake"].float())
        
        
        loss = sum([3*loss_throttle, loss_steer, loss_brake])/5
        
        # Backprop
        loss.backward()
        
        optimizer.step()

        running_loss += loss.item()
        if (batch_idx) % batch_size == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, n_epochs, batch_idx, total_step, loss.item()))
        
    # Epoch finished, evaluate network and save if network_learned
    train_loss.append(running_loss/total_step)
    print(f'\ntrain-loss: {np.mean(train_loss):.4f},')
    batch_loss = 0

    
    # Evaluation on Test set, skipped for now
    """
    with torch.no_grad():
        net.eval()
        for data_t, target_t in (test_dataloader):
            data_t, target_t = data_t.to(device), target_t.to(device)
            outputs_t = net(data_t)
            loss_t = criterion(outputs_t, target_t)
            batch_loss += loss_t.item()
        val_loss.append(batch_loss/len(test_dataloader))
        #network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, \n')

        
        if False:#network_learned:
            valid_loss_min = batch_loss
            torch.save(net.state_dict(), 'resnet.pt')
            print('Improvement-Detected, save-model')
    """

    net.train()
    

Epoch 1

Epoch [1/10], Step [0/21], Loss: 0.4260

train-loss: 0.3623,
Epoch 2

Epoch [2/10], Step [0/21], Loss: 0.2531

train-loss: 0.2869,
Epoch 3

Epoch [3/10], Step [0/21], Loss: 0.1711

train-loss: 0.2356,
Epoch 4

Epoch [4/10], Step [0/21], Loss: 0.0882

train-loss: 0.1976,
Epoch 5

Epoch [5/10], Step [0/21], Loss: 0.0578

train-loss: 0.1710,
Epoch 6

Epoch [6/10], Step [0/21], Loss: 0.0690

train-loss: 0.1520,
Epoch 7

Epoch [7/10], Step [0/21], Loss: 0.0638

train-loss: 0.1381,
Epoch 8

Epoch [8/10], Step [0/21], Loss: 0.0427

train-loss: 0.1264,
Epoch 9

Epoch [9/10], Step [0/21], Loss: 0.0418

train-loss: 0.1169,
Epoch 10

Epoch [10/10], Step [0/21], Loss: 0.0393

train-loss: 0.1091,
Wall time: 8min 48s


### Test predictions

In [9]:
test_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
print(next(iter(test_dataloader)).keys())


dict_keys(['idx', 'rgb', 'speed', 'steer', 'throttle', 'brake'])


In [10]:
idx, X = next(enumerate(test_dataloader))

In [11]:
data_ = normalize_batch(X["rgb"].transpose(1,4)[:,:,:,:,0]).float().to(device)
target_ = (X["throttle"],X["steer"],X["brake"])

NameError: name 'device' is not defined

In [52]:
from torchvision import transforms

img = X["rgb"].transpose(1,4)[0,:,:,:,0]

# Define a transformation to normalize the tensor
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Apply the transformation to the tensor
normalized_img = img#/255#normalize(img)

# Convert the tensor to a PIL image
img_pil = transforms.ToPILImage()(normalized_img)

# Display the image
img_pil.show()


In [45]:
print(img.shape)

torch.Size([3, 160, 960])


In [53]:
transforms.ToPILImage(img)
img_pil.show()

In [31]:
np.shape(data_)

torch.Size([64, 3, 160, 960])

In [32]:
with torch.no_grad():
    #net.eval()
    outputs_ = net(data_)

In [33]:
i =0

In [34]:
print(np.round(outputs_[i].cpu().numpy(),1))



[[0.8]
 [0.7]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.1]
 [0.7]
 [0.1]
 [0.8]
 [0.7]
 [0. ]
 [0.7]
 [0.1]
 [0. ]
 [0.8]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0.8]
 [0.6]
 [0.7]
 [0. ]
 [0.7]
 [0.1]
 [0. ]
 [0. ]
 [0.7]
 [0. ]
 [0. ]
 [0.7]
 [0.3]
 [0. ]
 [0.6]
 [0.7]
 [0.1]
 [0.7]
 [0. ]
 [0.7]
 [0.7]
 [0.1]
 [0.8]
 [0.1]
 [0.1]
 [0.8]
 [0.7]
 [0.8]
 [0.8]
 [0.7]
 [0. ]
 [0.8]
 [0.7]
 [0.7]
 [0. ]
 [0.5]
 [0.7]
 [0. ]]


In [35]:
print(np.round(target_[i].cpu().numpy(),1))

[[0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.1]
 [0.8]
 [0.1]
 [0.8]
 [0.8]
 [0.1]
 [0.8]
 [0.2]
 [0. ]
 [0.8]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0. ]
 [0.8]
 [0.8]
 [0.8]
 [0.1]
 [0.8]
 [0.1]
 [0. ]
 [0. ]
 [0.8]
 [0. ]
 [0. ]
 [0.8]
 [0.2]
 [0. ]
 [0.8]
 [0.8]
 [0.2]
 [0.8]
 [0.1]
 [0.8]
 [0.7]
 [0.1]
 [0.8]
 [0.1]
 [0.1]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0. ]
 [0.8]
 [0.8]
 [0.8]
 [0.1]
 [0.5]
 [0.8]
 [0.1]]


In [20]:
np.mean(abs(outputs_[i].cpu().numpy()-target_[i].cpu().numpy()))

0.033931069319581744

### Vanishing/Exploding Gradients

In [38]:
"""
for name, param in net.thr_head.named_parameters():
    if param.requires_grad:
        print(name, param.data.cpu().numpy())
"""

'\nfor name, param in net.thr_head.named_parameters():\n    if param.requires_grad:\n        print(name, param.data.cpu().numpy())\n'

In [37]:
for name, param in net.thr_head.named_parameters():
    if param.requires_grad:
        print(name, np.sum(param.data.cpu().numpy()))

0.weight -2.3901873
0.bias 0.006542502


## Saving and Loading

Not suited for leaderboard agents

In [22]:
#torch.save(net, 'rgb_resnet.pth')

In [23]:
#net = torch.load('rgb_resnet.pth')

In [24]:
#net = torch.load('rgb_resnet.pth')

suited for leaderboard agents

In [25]:
torch.save(net.state_dict(), "rgb_resnet.pth")

In [26]:
#net = MyResnet()
#net.load_state_dict(torch.load("rgb_resnet.pth"))
#net.cuda()

## Testing Time

In [27]:
# ohne preprocessing ca 16-17 sekunden. Mit preprocessing ca 37 sekunden
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
at = time.time()
for batch_idx, data in enumerate(train_dataloader):
    data_ = normalize_batch(data["rgb"].transpose(1,4)[:,:,:,:,0]).float()
et = time.time()
print(et-at)


18.26608109474182
