In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
sys.path.append(f'/media/beans/ssd/bespoke')

from models import EffNet
from constants import *
from imports import *
from rw_dataloader import RealWorldDataloader
from blender_dataloader import BlenderDataloader
from train_utils import run_epoch, eval_rw

torch.__version__, torch.cuda.device_count(), torch.cuda.get_device_name(torch.cuda.current_device())



('1.10.0', 2, 'NVIDIA GeForce RTX 3090')

In [2]:
import wandb
run_id = wandb.util.generate_id(); run_id

'1j1qlfob'

In [3]:
wandb.init(id='3qdtsae1', project="carla", resume="allow")

[34m[1mwandb[0m: Currently logged in as: [33mrgilman33[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.19 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [4]:
m = EffNet(model_arch="efficientnet_b3").to(device) # 13M params, 11.6M without RNN, 
sum([torch.numel(p) for p in m.parameters()]) / 1000

13332.809

In [5]:
m.backbone

EfficientNet(
  (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
        (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
 

In [6]:
m.load_state_dict(torch.load(f"{BESPOKE_ROOT}/models/m.torch"), strict=False)

<All keys matched successfully>

In [7]:
scaler = torch.cuda.amp.GradScaler() 
opt = torch.optim.Adam(m.parameters(), lr=1e-4)

In [8]:
BS = 32
dataloader_trn = BlenderDataloader(path_stem="trn", bs=BS)

In [9]:
%%time
#rw_dataloader = RealWorldDataloader()

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 5.48 µs


In [10]:
#wandb.watch(m, log="all")

In [11]:
model_stem = "6.24"
START_E = 11

In [None]:
n_epochs = 100
log_wandb = True

for e in range(START_E, n_epochs):
    
    # Train
    print(f"EPOCH {e}\n")
    run_epoch(dataloader_trn, m, opt=opt, scaler=scaler, 
              train=True, backwards=True, log_wandb=log_wandb, wandb=wandb, updates_per_epoch=1280)
    
    print("\n\n Eval...")
    
    #eval_rw(rw_dataloader, m, wandb) # will log directly to wandb
        
    # Saving model checkpoint each epoch
    torch.save(m.state_dict(), f"{BESPOKE_ROOT}/models/m_{model_stem}_e{e}.torch")
    

EPOCH 11

{'trn_control_loss': 0.00308829, 'consistency losses/trn_steer_cost': 0.00240685, 'consistency losses/trn_te_loss': 0.00827922, 'consistency losses/trn_torque_delta_loss': 95.99121094, 'logistical/obs_consumed_per_second': 234.5511811, 'logistical/obs_generated_per_second': 114.45669291, 'logistical/data_consumption_ratio': 2.05146131, 'logistical/manual_train_pause': 0.0, 'consistency losses/trn_torque_loss': 302.85005734, 'logistical/max_param': 16.23103142, 'logistical/lr': 0.0001}
{'logistical/obs_consumed_per_second': 231.4921875, 'logistical/obs_generated_per_second': 115.6015625, 'logistical/data_consumption_ratio': 2.0020659, 'logistical/manual_train_pause': 0.0, 'trn_control_loss': 0.00267515, 'consistency losses/trn_steer_cost': 0.00259026, 'consistency losses/trn_te_loss': 0.00935436, 'consistency losses/trn_torque_loss': 343.45183486, 'consistency losses/trn_torque_delta_loss': 97.77807617, 'logistical/max_param': 16.23187256, 'logistical/lr': 0.0001}
{'logistical

{'logistical/obs_consumed_per_second': 236.6953125, 'logistical/obs_generated_per_second': 109.3515625, 'logistical/data_consumption_ratio': 2.17136882, 'logistical/manual_train_pause': 0.0, 'trn_control_loss': 0.00290977, 'consistency losses/trn_steer_cost': 0.00284009, 'consistency losses/trn_te_loss': 0.00982807, 'consistency losses/trn_torque_loss': 344.85697115, 'consistency losses/trn_torque_delta_loss': 99.83618164, 'logistical/max_param': 16.23436928, 'logistical/lr': 0.0001}
{'logistical/obs_consumed_per_second': 232.125, 'logistical/obs_generated_per_second': 109.6484375, 'logistical/data_consumption_ratio': 2.12172517, 'logistical/manual_train_pause': 0.0, 'trn_control_loss': 0.00307555, 'consistency losses/trn_steer_cost': 0.00268383, 'consistency losses/trn_te_loss': 0.00935336, 'consistency losses/trn_torque_loss': 359.99871926, 'consistency losses/trn_torque_delta_loss': 99.85473633, 'logistical/max_param': 16.23239708, 'logistical/lr': 0.0001}
{'logistical/obs_consumed_

{'logistical/obs_consumed_per_second': 243.2109375, 'logistical/obs_generated_per_second': 111.0859375, 'logistical/data_consumption_ratio': 2.19382115, 'logistical/manual_train_pause': 0.0, 'trn_control_loss': 0.00266843, 'consistency losses/trn_steer_cost': 0.00260206, 'consistency losses/trn_te_loss': 0.00914223, 'consistency losses/trn_torque_loss': 326.46448864, 'consistency losses/trn_torque_delta_loss': 98.95996094, 'logistical/max_param': 16.23562622, 'logistical/lr': 0.0001}
{'logistical/obs_consumed_per_second': 241.25, 'logistical/obs_generated_per_second': 115.0703125, 'logistical/data_consumption_ratio': 2.09597947, 'logistical/manual_train_pause': 0.0, 'trn_control_loss': 0.00300542, 'consistency losses/trn_steer_cost': 0.0025301, 'consistency losses/trn_te_loss': 0.00870798, 'consistency losses/trn_torque_loss': 323.43478261, 'consistency losses/trn_torque_delta_loss': 96.60009766, 'logistical/max_param': 16.23660851, 'logistical/lr': 0.0001}
{'logistical/obs_consumed_pe

In [10]:
%%time

eval_rw(rw_dataloader, m, wandb) # will log directly to wandb



CPU times: user 14.3 s, sys: 417 ms, total: 14.7 s
Wall time: 14 s


In [13]:
torch.save(m.state_dict(), f"{BESPOKE_ROOT}/models/m_{model_stem}_e24.torch")