In [1]:
import math
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import deque 

from model import HDDriveDQN, HDMapSensorDQN, hd_net_args
from agent import MemoryBufferSimple
import random
import itertools
from srunner.tools import dotdict

In [2]:
with open('state.pickle', 'rb') as fp:
    state = pickle.load(fp)
state.keys()

dict_keys(['frame', 'accelerometer', 'gyroscope', 'compass', 'gnss', 'velocity', 'hd_map', 'front_rgb'])

In [3]:
h_size = 128
hd_sensor_model = HDMapSensorDQN((3,96,96), 7, p=0.3)
npImg_to_tensor = lambda x: torch.tensor(np.expand_dims(x.transpose(2,0,1), axis=0), dtype=torch.float32)

In [4]:
acc_X = torch.tensor(state['accelerometer'], dtype=torch.float32).unsqueeze(0)
comp_X = torch.tensor(state['compass'], dtype=torch.float32).unsqueeze(0)
gyro_X = torch.tensor(state['gyroscope'], dtype=torch.float32).unsqueeze(0)
bev_X = npImg_to_tensor(state['hd_map'])
front_X = npImg_to_tensor(state['front_rgb'])
vel_X = torch.tensor(state['velocity']).unsqueeze(0)
bev_X.shape, front_X.shape, acc_X.shape, gyro_X.shape, comp_X.shape, vel_X.shape

(torch.Size([1, 3, 500, 400]),
 torch.Size([1, 3, 500, 400]),
 torch.Size([1, 3]),
 torch.Size([1, 3]),
 torch.Size([1, 1]),
 torch.Size([1]))

In [5]:
bev_X_h, front_X_h, ego_X_h, vel_X_h = hd_sensor_model(bev_X.repeat(32,1,1,1), front_X.repeat(32,1,1,1), acc_X.repeat(32,1), comp_X.repeat(32,1), gyro_X.repeat(32,1), vel_X.repeat(32))
bev_X_h.shape, front_X_h.shape, ego_X_h.shape, vel_X_h.shape

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x49600 and 2304x128)

In [None]:
args = hd_net_args
hd_drive_net = HDDriveDQN(args)

In [7]:
bev_X.repeat(4,32,1,1,1).shape

torch.Size([4, 32, 3, 96, 96])

In [8]:
steering_q_vals, throttle_q_vals, brake_q_vals, t_or_b_vals = hd_drive_net(bev_X.repeat(args.n_frames,32,1,1,1), 
             front_X.repeat(args.n_frames, 32,1,1,1), 
             acc_X.repeat(args.n_frames, 32,1), 
             comp_X.repeat(args.n_frames, 32,1), 
             gyro_X.repeat(args.n_frames, 32,1), 
             vel_X.repeat(args.n_frames, 32))

torch.Size([1, 32, 128])
torch.Size([4, 32, 128])


In [9]:
steering_q_vals.shape, throttle_q_vals.shape, brake_q_vals.shape

(torch.Size([32, 11]), torch.Size([32, 11]), torch.Size([32, 11]))

In [10]:
sensor_params = sum(p.numel() for p in hd_drive_net.sensor_net.parameters())
fusion_params = sum(p.numel() for p in hd_drive_net.fusion_net.parameters())
temporal_params = sum(p.numel() for p in hd_drive_net.temporal_net.parameters())
throttle_params = sum(p.numel() for p in hd_drive_net.throttle_net.parameters())
steering_params = sum(p.numel() for p in hd_drive_net.steering_net.parameters())
brake_params = sum(p.numel() for p in hd_drive_net.brake_net.parameters())
t_or_b_params = sum(p.numel() for p in hd_drive_net.t_or_b_net.parameters())
sensor_params, fusion_params, temporal_params, throttle_params, steering_params, brake_params, t_or_b_params

(750784, 661632, 661632, 1419, 1419, 1419, 258)

In [11]:
sum(p.numel() for p in hd_drive_net.parameters())

2078563

In [12]:
class MemoryBufferSeparated(torch.utils.data.Dataset):
    """Assumme that each episode has at least num_frames+1 experieneces
       else it will be discarded. When max is reached max(10% samples,1ep) will be dropped."""
    def __init__(self, num_frames, max_buffer_sz=25_000):
        self.avg_ep_len = None 
        self.ep_lengths = deque()
        self.max_buffer_size = max_buffer_sz
        self.ep_states = deque()
        self.ep_actions = deque()
        self.ep_rewards = deque()
        self.ep_dones = deque()
        self.active_ep_idx = -1 
        self.num_frames = num_frames
    
    def __len__(self):
        return len(self.states) - self.num_frames#sum(self.ep_lengths)

    def add_experience(self, state, action, reward, done, new_episode=False):
        """states are at t+1, so when get_item we have to select actions, rewards one forward"""
        if new_episode:
            self.ep_lengths.append(0)
            self.ep_states.append([])
            self.ep_actions.append([])
            self.ep_rewards.append([])
            self.ep_dones.append([])

        self.ep_lengths[-1] += 1 
        if self.max_buffer_size < sum(self.ep_lengths):
            self.flush_experiences()

        self.ep_states[-1].append(state)
        self.ep_actions[-1].append(action)
        self.ep_rewards[-1].append(reward)
        self.ep_dones[-1].append(done)

    def flush_experiences(self, per=0.1):
        to_flush = int(per*self.max_buffer_size)
        flushed = 0
        while flushed < to_flush:
            flushed += self.ep_lengths.popleft()
            self.ep_states.popleft()
            self.ep_actions.popleft()
            self.ep_rewards.popleft()
            self.ep_dones.popleft()
        
    def _np_img_to_tensor(self, img):
        img = np.array(img) # num_frames+1 x W x H x C
        return torch.tensor(np.expand_dims(img.transpose(0,3,1,2), axis=0), dtype=torch.float32)

    def _process_states(self, state):
        """This function should be passed into the class as it could depend on the
           sensor setup. This configuration works for the HDSensor setup."""
        bev_X = self._np_img_to_tensor(state['hd_map'])
        front_X = self._np_img_to_tensor(state['front_rgb'])
        acc_x = torch.tensor(state['accelerometer'], dtype=torch.float32)
        comp_x = torch.tensor(state['compass'], dtype=torch.float32)
        gyr_x = torch.tensor(state['gyroscope'], dtype=torch.float32) 
        vel_X = torch.tensor(state['velocity']).unsqueeze(0)

        return bev_X, front_X, acc_X, comp_X, gyr_x, vel_X
        
    def __getitem__(self, idx):
        ep_idx = random.choice(range(len(self.ep_lengths)-1)) # don't sample from current ep as it might be filled out now
        t_idx = random.choice(range(self.ep_lengths[ep_idx]-self.num_frames-1)) # -1 as we need s_t+1 also
        end_idx = t_idx + self.num_frames + 1 # +1 as we need next state as well
        states = self._process_states(self.ep_states[ep_idx][t_idx:end_idx])
        action = torch.tensor(self.ep_actions[ep_idx][end_idx], dtype=torch.int64) 
        reward = torch.tensor(self.ep_rewards[ep_idx][end_idx], dtype=torch.float32)
        dones = torch.tensor(self.ep_rewards[ep_idx][end_idx], dtype=torch.float32)
        return states, action, reward

In [108]:
buffer = MemoryBufferSimple(num_frames=4, max_buffer_sz=100)

In [116]:
buffer.__len__()

95

In [117]:
len(buffer)

95

In [114]:
n = 100
for i in range(n):
    buffer.add_experience(state, [0,0,0], 1.0, False)

In [109]:
data_loader = torch.utils.data.DataLoader(buffer, batch_size=16)

In [115]:
x = next(iter(data_loader))

In [128]:
DEVICE = 'cpu'
state_batch, action_batch, reward_batch = x
state_batch[0].shape, action_batch.shape, reward_batch.shape

(torch.Size([16, 5, 3, 96, 96]), torch.Size([16, 3]), torch.Size([16]))

In [97]:
for i in range(30):
    next(iter(data_loader))

In [126]:
x[0][0], x[0][0].shape

(torch.Size([16, 5, 3, 96, 96]), torch.Size([16, 5, 3, 96, 96]))

In [18]:
x[0][0].device

device(type='cpu')

In [19]:
for state_tensor in x[0]:
    state_tensor.transpose_(0,1)
    print(state_tensor.shape)

torch.Size([5, 5, 3, 96, 96])
torch.Size([5, 5, 3, 96, 96])
torch.Size([5, 5, 3])
torch.Size([5, 5, 1])
torch.Size([5, 5, 3])
torch.Size([5, 5])


In [23]:
cur_state = x[0][0][:-1], x[0][1][:-1], x[0][2][:-1], x[0][3][:-1], x[0][4][:-1], x[0][5][:-1]
s_q, t_q, b_q, t_or_b_q = hd_drive_net(*cur_state)
s_q.shape, t_q.shape, b_q.shape, t_or_b_q.shape

torch.Size([1, 5, 128])
torch.Size([4, 5, 128])


(torch.Size([5, 11]),
 torch.Size([5, 11]),
 torch.Size([5, 11]),
 torch.Size([5, 2]))

In [122]:
cur_state[0].shape

torch.Size([4, 5, 3, 96, 96])

In [21]:
for state_tensor in cur_state:
    print(state_tensor.shape)

torch.Size([4, 5, 3, 96, 96])
torch.Size([4, 5, 3, 96, 96])
torch.Size([4, 5, 3])
torch.Size([4, 5, 1])
torch.Size([4, 5, 3])
torch.Size([4, 5])
