# Reinforcement Learning: Min Distance RL with CARLA CL (Larger Model) 13/01/22

At the recommednation of one of the authors of the paper I will try to adapt the following [tutorial](https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html) to work acheive the same results as in the paper.

Notes: Pygame only runs on python 3.7, pytorch must be install direclty into the env

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

import carla
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
from PIL import Image

from torch.utils.tensorboard import SummaryWriter

import gym
import gym_carla

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [20]:
# parameters for the gym_carla environment
params = {
    'number_of_vehicles': 0,
    'number_of_walkers': 0,
    'display_size': 256,  # screen size of bird-eye render
    'max_past_step': 1,  # the number of past steps to draw
    'dt': 0.1,  # time interval between two frames
    'discrete': True,  # whether to use discrete control space
    'discrete_acc': [2.0],  # discrete value of accelerations
    'discrete_steer': [-0.3, 0.0, 0.3],  # discrete value of steering angles
    'continuous_accel_range': [-3.0, 3.0],  # continuous acceleration range
    'continuous_steer_range': [-0.3, 0.3],  # continuous steering angle range
    'ego_vehicle_filter': 'vehicle.lincoln*',  # filter for defining ego vehicle
    'port': 3000,  # connection port
    'town': 'Town04',  # which town to simulate
    'task_mode': 'random',  # mode of the task, [random, roundabout (only for Town03)]
    'max_time_episode': 500,  # maximum timesteps per episode
    'max_waypt': 12,  # maximum number of waypoints
    'obs_range': 32,  # observation range (meter)
    'lidar_bin': 0.125,  # bin size of lidar sensor (meter)
    'd_behind': 12,  # distance behind the ego vehicle (meter)
    'out_lane_thres': 1.8,  # threshold for out of lane
    'desired_speed': 3,  # desired speed (m/s)
    'max_ego_spawn_times': 20,  # maximum times to spawn ego vehicle
    'display_route': True,  # whether to render the desired route
    'pixor_size': 64,  # size of the pixor labels
    'pixor': False,  # whether to output PIXOR observation
    'routes':{'Town04':{'H':[264,234]}},
    'weather':'ClearNoon',
    'obs_size':128
}

#{'Town04':{'E':[0,301,334,120,75,51],
#                                'M':[191,131,197,210,371,348,141,320],
#                                'H':[251,161,264,234,167,182]
#                                }
#                    },

# Set gym-carla environment
env = gym.make('carla-v0', params=params)

connecting to Carla server...
Carla server connected!
WeatherParameters(cloudiness=5.000000, cloudiness=5.000000, precipitation=0.000000, precipitation_deposits=0.000000, wind_intensity=10.000000, sun_azimuth_angle=-1.000000, sun_altitude_angle=45.000000, fog_density=2.000000, fog_distance=0.750000, fog_falloff=0.100000, wetness=0.000000, scattering_intensity=1.000000, mie_scattering_scale=0.030000, rayleigh_scattering_scale=0.033100)


In [3]:

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([],maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    
    def __len__(self):
        return len(self.memory)


In [4]:
class PerceptionNet(nn.Module):

    def __init__(self):
        super(PerceptionNet,self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=4, stride=2, padding=1)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        
        self.conv5 = nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        
        self.conv6 = nn.Conv2d(512, 64, kernel_size=4, stride=1)
        
        self.conv7 = torch.nn.ConvTranspose2d(64,512, kernel_size =4, stride=1)
        self.bn6 = nn.BatchNorm2d(512)
        
        self.conv8 = torch.nn.ConvTranspose2d(512,256, kernel_size =4, stride=2, padding=1)
        self.bn7 = nn.BatchNorm2d(256)
        
        self.conv9 = torch.nn.ConvTranspose2d(256,128, kernel_size =4, stride=2, padding=1)
        self.bn8 = nn.BatchNorm2d(128)
        
        self.conv10 = torch.nn.ConvTranspose2d(128,64, kernel_size =4, stride=2, padding=1)
        self.bn9 = nn.BatchNorm2d(64)
        
        self.conv11 = torch.nn.ConvTranspose2d(64,32, kernel_size =4, stride=2, padding=1)
        self.bn10 = nn.BatchNorm2d(32)
        
        self.conv12 = torch.nn.ConvTranspose2d(32,23, kernel_size =4, stride=2,padding=1)
        
            
    def encode(self, x):
        x = F.leaky_relu(self.conv1(x),negative_slope=0.02)
        x = F.leaky_relu(self.bn2(self.conv2(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn3(self.conv3(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn4(self.conv4(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn5(self.conv5(x)),negative_slope=0.02)
        return self.conv6(x)
    
    def decode(self, x):
        x = F.leaky_relu(self.bn6(self.conv7(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn7(self.conv8(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn8(self.conv9(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn9(self.conv10(x)),negative_slope=0.02)
        x = F.leaky_relu(self.bn10(self.conv11(x)),negative_slope=0.02)
        return F.log_softmax(self.conv12(x),dim=1)
    
    def forward(self, x):
        x = x.to(device, dtype=torch.float32)
        latent = self.encode(x)
        out = self.decode(latent)
        return out, latent

In [5]:
class Full_DQN(nn.Module):

    def __init__(self, outputs,input_size):
        super(Full_DQN, self).__init__()
        
        self.lin1 = nn.Linear(input_size+9,100)
        self.lin2 = nn.Linear(100,50)
        self.lin3 = nn.Linear(50,25)
        self.lin4 = nn.Linear(25,15)
        self.lin5 = nn.Linear(15,8)
        self.lin6 = nn.Linear(8,3)

    def forward(self, x):
        x = x.to(device)
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = F.relu(self.lin3(x))
        x = F.relu(self.lin4(x))
        x = F.relu(self.lin5(x))
        x = self.lin6(x)
        
        return x.view(x.size(0), -1)

In [6]:
def initalize_weights(layer):
    if isinstance(layer, torch.nn.Linear) or isinstance(layer,torch.nn.Linear):
        nn.init.kaiming_uniform_(layer.weight.data,nonlinearity='relu')

In [7]:
BATCH_SIZE = 512
GAMMA = 0.999
EPS_START = 0.5
EPS_END = 0.05
EPS_DECAY = 100000
TARGET_UPDATE = 256

writer = SummaryWriter()

n_actions = env.action_space.n

policy_net = Full_DQN(n_actions,64).to(device)
#policy_net.load_state_dict(torch.load('./model_params_CL/Full_model_4.final'))
policy_net.apply(initalize_weights)
policy_net.eval()

target_net = Full_DQN(n_actions,64).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

model = PerceptionNet()
model.to(device)
model.load_state_dict(torch.load('./AE_params/model_3.best'))
model.eval()
#don't want to update AE model 
for param in model.parameters():
    param.requires_grad = False

# Model <= 7
optimizer = optim.RMSprop(policy_net.parameters(),lr=0.005)

#Model 8 & from 2022 
#optimizer = optim.Adam(policy_net.parameters(),lr=0.005)


memory = ReplayMemory(7500)


steps_done = 0


def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            #arg max select the idex of the largest value and view changes shape from (1,) to (1,1)
            #try test net
            return policy_net(state.float()).argmax().view(1,1)
    else:
        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)


episode_durations = []

In [39]:
def optimize_model(input_size):
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
    # detailed explanation). This converts batch-array of Transitions
    # to Transition of batch-arrays.
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    # (a final state would've been the one after which simulation ended)
    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
    
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)
    #reshape state_batch for nn
  
    # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
    # columns of actions taken. These are the actions which would've been taken
    # for each batch state according to policy_net
    
    # selects column of output that was selceted 
    state_action_values = policy_net(torch.reshape(state_batch,(BATCH_SIZE,1,input_size)).float()).gather(1,action_batch)
    
    
    # Compute V(s_{t+1}) for all next states.
    # Expected values of actions for non_final_next_states are computed based
    # on the "older" target_net; selecting their best reward with max(1)[0].
    # This is merged based on the mask, such that we'll have either the expected
    # state value or 0 in case the state was final.
    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    next_state_values[non_final_mask] = target_net(torch.reshape(non_final_next_states,
                                        (list(non_final_next_states.shape)[0],1,input_size)).float()).max(1)[0].detach()
    # Compute the expected Q values
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    # Compute Huber loss
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))


    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    #gradient clipping
    for param in policy_net.parameters():
        param.grad.data.clamp(-1, 1)
    optimizer.step()
    
    return loss

In [40]:
num_episodes = 1000
steps_done = 0
use_fixed_idx = 0
env.use_fixed = 'H'
levels = ['H','H','H']
next_lvl = 300
min_overall_loss = 1000
input_size = 73
for i_episode in range(num_episodes):
    eps_loss = []
    min_loss = 100
    rewards = 0
    # Initialize the environment and state
    
    obs = env.reset()
    #ego_dir retirves the distance and angle from vehicle to nearest waypoint
    ego_location = env.ego.get_location()
    ego_dir = gym_carla.envs.misc.get_lane_dis(env.waypoints,ego_location.x,ego_location.y)

    #pos gets a distanc d and array w which has to be seperated out in below line
    ego_pos = np.asarray((ego_dir[0],ego_dir[1][0],ego_dir[1][1]),dtype=np.float32)
    state = np.concatenate((ego_pos,np.zeros(6)))
    state = torch.tensor(state).reshape(1,9,1,1)

    new_obs = torch.tensor(obs['camera'])
    new_obs = new_obs.permute(2,0,1).reshape(1,3,128,128)
    
    _,latent_space = model(new_obs)
    state = torch.cat((state,latent_space.cpu()),1).reshape(1,input_size)


    episode_loss = loss = 1000
    for t in count():
        # Select and perform an action
        action = select_action(state)
        obs, reward, done, info  = env.step(action.item())
        rewards += reward
        reward = torch.tensor([reward], device=device)

        if not done:
            
            #pos gets a distanc d and array w which has to be seperated out in below line
            pos = np.asarray((info['position'][0],info['position'][1][0],info['position'][1][1]))
            ang = np.asarray(info['angular_vel'])
            acc = np.asarray(info['acceleration'])
            steer = np.asarray(info['steer'])
            next_state = np.concatenate((pos, ang, acc, steer), axis=None)
            
            new_obs = torch.tensor(obs['camera'])
            new_obs = new_obs.permute(2,0,1).reshape(1,3,128,128)
            _,latent_space = model(new_obs)
            info_state = torch.tensor(next_state).reshape(1,9,1,1)
            next_state = torch.cat((info_state,latent_space.cpu()),1).reshape(1,input_size)
        else:
            next_state = None

        memory.push(state, action, next_state, reward)
        
        state = next_state

        # Perform one step of the optimization (on the policy network)
        loss = optimize_model(input_size)
        if loss:
            if loss < min_loss:
                min_loss = loss
            eps_loss.append(loss)
        else:
            eps_loss.append(1)
        
        if done:
            episode_durations.append(t + 1)
            avg_loss = sum(eps_loss)/len(eps_loss)
            print('{}:{},{}:{}'.format('avg loss', avg_loss, 'Episode', i_episode))
            writer.add_scalar("Avg Loss/train", avg_loss, i_episode)
            writer.add_scalar("Min Loss/train", min_loss, i_episode)
            break
            
    # Update the target network
    if steps_done % TARGET_UPDATE == 0:
        target_net.load_state_dict(policy_net.state_dict())
        
    len_episode = t+1
    writer.add_scalar("Lenght/Epoch", len_episode, i_episode)
    writer.add_scalar("Reward/Episode", rewards, i_episode)
    
    #save model if better than previous episode
    if avg_loss < min_overall_loss:
        min_overall_loss = avg_loss
        torch.save(target_net.state_dict(), './model_params_CL/Full_model_7.best')
    

print('Complete')
print('Steps Done: ', steps_done)
torch.save(target_net.state_dict(), './model_params_CL/Full_model_7.final')

writer.flush()
writer.close()
env.close()

avg loss:1.0,Episode:0




avg loss:1.0,Episode:1




avg loss:1.0,Episode:2




avg loss:1.0,Episode:3




avg loss:1.0,Episode:4




avg loss:1.0,Episode:5




avg loss:1.0,Episode:6




avg loss:2.716680162222764,Episode:7




avg loss:0.7526470758046286,Episode:8




avg loss:0.49436207840169344,Episode:9




avg loss:0.37935918977459543,Episode:10




avg loss:0.321481724893237,Episode:11




avg loss:0.31534784181891373,Episode:12




avg loss:0.29834861554673314,Episode:13




avg loss:0.3434214369142932,Episode:14




avg loss:0.3005883565539396,Episode:15




avg loss:0.2868512967110258,Episode:16




avg loss:0.25106933943548976,Episode:17




avg loss:0.23845744865854462,Episode:18




avg loss:0.23722826737728075,Episode:19




avg loss:0.23708925175857473,Episode:20




avg loss:0.22217732717305774,Episode:21




avg loss:0.2170470475654731,Episode:22




avg loss:0.24602116013366995,Episode:23




avg loss:0.21400050218726568,Episode:24




avg loss:0.22469745673741787,Episode:25




avg loss:0.2166313912785532,Episode:26




avg loss:0.19849995091771136,Episode:27




avg loss:0.20616109382575826,Episode:28




avg loss:0.20690220177029756,Episode:29




avg loss:0.18326493370948077,Episode:30




avg loss:0.2126519847284583,Episode:31




avg loss:0.19779186372578117,Episode:32




avg loss:0.1930851764938951,Episode:33




avg loss:0.1952756064524364,Episode:34




avg loss:0.20051632299873415,Episode:35




avg loss:0.1975015162179052,Episode:36




avg loss:0.17853760509037891,Episode:37




avg loss:0.20777109271021474,Episode:38




avg loss:0.18855287666104065,Episode:39




avg loss:0.20180467307543096,Episode:40




avg loss:0.20256009367908195,Episode:41




avg loss:0.197363288629703,Episode:42




avg loss:0.20028936133439507,Episode:43




avg loss:0.1820714991299975,Episode:44




avg loss:0.19480902640768594,Episode:45




avg loss:0.18898234604997033,Episode:46




avg loss:0.20188147998580913,Episode:47




avg loss:0.19603345022593185,Episode:48




avg loss:0.2006676650567935,Episode:49




avg loss:0.21074975399320098,Episode:50




avg loss:0.2147241465692884,Episode:51




avg loss:0.19075488489009496,Episode:52




avg loss:0.2164244310767984,Episode:53




avg loss:0.19746398307489368,Episode:54




avg loss:0.19340686882402014,Episode:55




avg loss:0.2079956079179986,Episode:56




avg loss:0.2016807400503435,Episode:57




avg loss:0.198486474805398,Episode:58




avg loss:0.1856142937545202,Episode:59




avg loss:0.20086567867592423,Episode:60




avg loss:0.19576272057792676,Episode:61




avg loss:0.18938169245420775,Episode:62




avg loss:0.19150293684235098,Episode:63




avg loss:0.20170682015971747,Episode:64




avg loss:0.1836231898316157,Episode:65




avg loss:0.1869498557131382,Episode:66




avg loss:0.1921414402338315,Episode:67




avg loss:0.19338089887232565,Episode:68




avg loss:0.19689799679188102,Episode:69




avg loss:0.2036438441054732,Episode:70




avg loss:0.20693365564974833,Episode:71




avg loss:0.19312469231839957,Episode:72




avg loss:0.1838778623286115,Episode:73




avg loss:0.20490357677351836,Episode:74




avg loss:0.21516222955043238,Episode:75




avg loss:0.20682222986458798,Episode:76




avg loss:0.2128301660840738,Episode:77




avg loss:0.20524172154473522,Episode:78




avg loss:0.20295337797764496,Episode:79




avg loss:0.1918794515414792,Episode:80




avg loss:0.21026882021279122,Episode:81




avg loss:0.21034996799429417,Episode:82




avg loss:0.2014397642327522,Episode:83




avg loss:0.20875633772058153,Episode:84




avg loss:0.17848224963145315,Episode:85




avg loss:0.2059109485978779,Episode:86




avg loss:0.19858443504077627,Episode:87




avg loss:0.1954924635668834,Episode:88




avg loss:0.1879036976683633,Episode:89




avg loss:0.20300859410360636,Episode:90




avg loss:0.20815653015124727,Episode:91




avg loss:0.19500247120816577,Episode:92




avg loss:0.18736982448243722,Episode:93




avg loss:0.18495873748482616,Episode:94




avg loss:0.19783044417229323,Episode:95




avg loss:0.18859104437481095,Episode:96




avg loss:0.1937114445768698,Episode:97




avg loss:0.1954479300411855,Episode:98




avg loss:0.1902089138021679,Episode:99




avg loss:0.1769798211709983,Episode:100




avg loss:0.20867971939154933,Episode:101




avg loss:0.19880283131487944,Episode:102




avg loss:0.20063428327423977,Episode:103




avg loss:0.20551485316091797,Episode:104




avg loss:0.20517525345553891,Episode:105




avg loss:0.2045665583836406,Episode:106




avg loss:0.21006703146142153,Episode:107




avg loss:0.2227335410485973,Episode:108




avg loss:0.2009040553851218,Episode:109




avg loss:0.21147602870692503,Episode:110




avg loss:0.21449699979868922,Episode:111




avg loss:0.2093251784970039,Episode:112




avg loss:0.20189925104241666,Episode:113




avg loss:0.20556674888425996,Episode:114




avg loss:0.2109176450505758,Episode:115




avg loss:0.20620611700594169,Episode:116




avg loss:0.2063850920414937,Episode:117




avg loss:0.1928620681466333,Episode:118




avg loss:0.22079392847147658,Episode:119




avg loss:0.20937271167679702,Episode:120




avg loss:0.19113685913892034,Episode:121




avg loss:0.2150737174778652,Episode:122




avg loss:0.21823624757630128,Episode:123




avg loss:0.21329204146511518,Episode:124




avg loss:0.21600510048327679,Episode:125




avg loss:0.22080294649818585,Episode:126




avg loss:0.21139160353865133,Episode:127




avg loss:0.22077582025422882,Episode:128




avg loss:0.21180589205018846,Episode:129




avg loss:0.2298368416611809,Episode:130




avg loss:0.21156401784202705,Episode:131




avg loss:0.20997693629016703,Episode:132




avg loss:0.2289373880829411,Episode:133




avg loss:0.21398046447845434,Episode:134




avg loss:0.21280439429131873,Episode:135




avg loss:0.22661290945160464,Episode:136




avg loss:0.22061889660309106,Episode:137




avg loss:0.233464611402515,Episode:138




avg loss:0.20755004634281443,Episode:139




avg loss:0.22459144044106685,Episode:140




avg loss:0.2372448824520014,Episode:141




avg loss:0.2174433474333702,Episode:142




avg loss:0.23083430746793387,Episode:143




avg loss:0.21719127444517203,Episode:144




avg loss:0.22824733432179972,Episode:145




avg loss:0.236407785435406,Episode:146




avg loss:0.22498814397431774,Episode:147




avg loss:0.22149227065863042,Episode:148




avg loss:0.2136931934868787,Episode:149




avg loss:0.2280938751554082,Episode:150




avg loss:0.21891231417927723,Episode:151




avg loss:0.22279757786675658,Episode:152




avg loss:0.21203250805917512,Episode:153




avg loss:0.2198750302626692,Episode:154




avg loss:0.23128336664796828,Episode:155




avg loss:0.2256251338909106,Episode:156




avg loss:0.21595058003284107,Episode:157




avg loss:0.224655510664128,Episode:158




avg loss:0.2163528000223797,Episode:159




avg loss:0.22081196624346025,Episode:160




avg loss:0.22382230605530787,Episode:161




avg loss:0.22322735159418283,Episode:162




avg loss:0.21785375732037215,Episode:163




avg loss:0.22901184624699755,Episode:164




avg loss:0.22538671300076263,Episode:165




avg loss:0.2189144010989726,Episode:166




avg loss:0.23250600302185692,Episode:167




avg loss:0.21993995174978945,Episode:168




avg loss:0.2217761132722358,Episode:169




avg loss:0.22306757919868692,Episode:170




avg loss:0.23017520256016813,Episode:171




avg loss:0.22094335881113059,Episode:172




avg loss:0.22021602003649587,Episode:173




avg loss:0.22914714747441073,Episode:174




avg loss:0.22191604489337496,Episode:175




avg loss:0.22376750403931306,Episode:176




avg loss:0.21260518359052244,Episode:177




avg loss:0.222177437462964,Episode:178




avg loss:0.21179566911036035,Episode:179




avg loss:0.23095342984776923,Episode:180




avg loss:0.21663507460134426,Episode:181




avg loss:0.21350330226000422,Episode:182




avg loss:0.21344440837103776,Episode:183




avg loss:0.2197966098199738,Episode:184




avg loss:0.20614741668485048,Episode:185




avg loss:0.22012231870575297,Episode:186




avg loss:0.21713066068695872,Episode:187




avg loss:0.21151455189074558,Episode:188




avg loss:0.2060442923746613,Episode:189




avg loss:0.21745205769842935,Episode:190




avg loss:0.19903545604503622,Episode:191




avg loss:0.22090228124867956,Episode:192




avg loss:0.19832384613167245,Episode:193




avg loss:0.20672318606872175,Episode:194




avg loss:0.2093943215893916,Episode:195




avg loss:0.21958203776877339,Episode:196




avg loss:0.20746216840830006,Episode:197




avg loss:0.20631485750706005,Episode:198




avg loss:0.20713805421033354,Episode:199




avg loss:0.22274825813959745,Episode:200




avg loss:0.2149517764037632,Episode:201




avg loss:0.2146732188509983,Episode:202




avg loss:0.21408593831461448,Episode:203




avg loss:0.19665524450470065,Episode:204




avg loss:0.2111020573964131,Episode:205




avg loss:0.2013546931964224,Episode:206




avg loss:0.20759539109448105,Episode:207




avg loss:0.20899319404191585,Episode:208




avg loss:0.21089175658998532,Episode:209




avg loss:0.20889030563831454,Episode:210




avg loss:0.19897792390549865,Episode:211




avg loss:0.2091350105413847,Episode:212




avg loss:0.22701085021691397,Episode:213




avg loss:0.2143464523673045,Episode:214




avg loss:0.21759372538393795,Episode:215




avg loss:0.19941787309482467,Episode:216




avg loss:0.21287757348315917,Episode:217




avg loss:0.2169661023551896,Episode:218




avg loss:0.20110124680584746,Episode:219




avg loss:0.20836094073188824,Episode:220




avg loss:0.20935278621632367,Episode:221




avg loss:0.210731768981364,Episode:222




avg loss:0.22019597508435093,Episode:223




avg loss:0.21581938281758276,Episode:224




avg loss:0.2021972900527762,Episode:225




avg loss:0.20196320862971248,Episode:226




avg loss:0.2141948100035784,Episode:227




avg loss:0.22319098725718767,Episode:228




avg loss:0.20120841324071073,Episode:229




avg loss:0.21259269348464632,Episode:230




avg loss:0.20549955316919308,Episode:231




avg loss:0.1967403196390243,Episode:232




avg loss:0.21443342906966964,Episode:233




avg loss:0.20783626802169458,Episode:234




avg loss:0.20846378624441644,Episode:235




avg loss:0.21141283639800196,Episode:236




avg loss:0.21958337055880042,Episode:237




avg loss:0.20304328022359538,Episode:238




avg loss:0.20806492466758844,Episode:239




avg loss:0.20711769328646043,Episode:240




avg loss:0.21763845159434517,Episode:241




avg loss:0.21255805370133474,Episode:242




avg loss:0.2201549920315042,Episode:243




avg loss:0.22595620457065324,Episode:244




avg loss:0.21513237315102463,Episode:245




avg loss:0.21974287837799814,Episode:246




avg loss:0.2146227497083949,Episode:247




avg loss:0.2463255107584023,Episode:248




avg loss:0.2159318041252998,Episode:249




avg loss:0.21328937483058358,Episode:250




avg loss:0.21712759398074805,Episode:251




avg loss:0.22330435122210368,Episode:252




avg loss:0.2048315566650794,Episode:253




avg loss:0.23140581810300775,Episode:254




avg loss:0.19746938568985625,Episode:255




avg loss:0.20381606072516648,Episode:256




avg loss:0.2061225601376162,Episode:257




avg loss:0.20100506431716314,Episode:258




avg loss:0.21300423254501322,Episode:259




avg loss:0.208528356220271,Episode:260




avg loss:0.20979757495753698,Episode:261




avg loss:0.19851316928547288,Episode:262




avg loss:0.2062875546345231,Episode:263




avg loss:0.19105759063558403,Episode:264




avg loss:0.20576939163652277,Episode:265




avg loss:0.21281474114296897,Episode:266




avg loss:0.1996226791768923,Episode:267




avg loss:0.20426664958274304,Episode:268




avg loss:0.19450750332512906,Episode:269




avg loss:0.19881125525971238,Episode:270




avg loss:0.19858206009609713,Episode:271




avg loss:0.20207225615110969,Episode:272




avg loss:0.20359236720636353,Episode:273




avg loss:0.1974380338743804,Episode:274




avg loss:0.2099113477670983,Episode:275




avg loss:0.19502178201242837,Episode:276




avg loss:0.19474573427405495,Episode:277




avg loss:0.20106020196365904,Episode:278




avg loss:0.19785490182321958,Episode:279




avg loss:0.19808618733783992,Episode:280




avg loss:0.20431292229966838,Episode:281




avg loss:0.20379304207503462,Episode:282




avg loss:0.19379938992006318,Episode:283




avg loss:0.203474911114169,Episode:284




avg loss:0.198579425411012,Episode:285




avg loss:0.20438274595335565,Episode:286




avg loss:0.20779695777317966,Episode:287




avg loss:0.20367536654678725,Episode:288




avg loss:0.20054874146248794,Episode:289




avg loss:0.19547165817921816,Episode:290




avg loss:0.21301124304584151,Episode:291




avg loss:0.19577716830170253,Episode:292




avg loss:0.20599197311908282,Episode:293




avg loss:0.1980260007638439,Episode:294




avg loss:0.19443726122025023,Episode:295




avg loss:0.1913212597625647,Episode:296




avg loss:0.1991789254495108,Episode:297




avg loss:0.2065449080200723,Episode:298




avg loss:0.2160513439574297,Episode:299




avg loss:0.19598760674038462,Episode:300




avg loss:0.22357494828130756,Episode:301




avg loss:0.1976555004084783,Episode:302




avg loss:0.2007262804183424,Episode:303




avg loss:0.20644401181951497,Episode:304




avg loss:0.20937770160641045,Episode:305




avg loss:0.20484540446070046,Episode:306




avg loss:0.20701429402194285,Episode:307




avg loss:0.19411622089957956,Episode:308




avg loss:0.2076720225979993,Episode:309




avg loss:0.20038992567796257,Episode:310




avg loss:0.2062237313184163,Episode:311




avg loss:0.205933865805029,Episode:312




avg loss:0.2059984250904965,Episode:313




avg loss:0.2050003716499341,Episode:314




avg loss:0.6308194368799319,Episode:315




avg loss:0.4670324283480668,Episode:316




avg loss:0.4191630437281687,Episode:317




avg loss:0.39935948825163436,Episode:318




avg loss:0.37046423481545937,Episode:319




avg loss:0.34332364535606685,Episode:320




avg loss:0.3279502556238529,Episode:321




avg loss:0.32369360951329523,Episode:322




avg loss:0.30936825287123354,Episode:323




avg loss:0.31010912405159924,Episode:324




avg loss:0.2983036098458835,Episode:325




avg loss:0.3186809941855602,Episode:326




avg loss:0.3030873501788933,Episode:327




avg loss:0.28445174430408726,Episode:328




avg loss:0.2826252861426234,Episode:329




avg loss:0.278482667922184,Episode:330




avg loss:0.27659860877441,Episode:331




avg loss:0.29392006148698296,Episode:332




avg loss:0.2795135699438763,Episode:333




avg loss:0.28621487628180475,Episode:334




avg loss:0.27737014285876754,Episode:335




avg loss:0.26733425196461813,Episode:336




avg loss:0.2677748307962934,Episode:337




avg loss:0.2666795339362041,Episode:338




avg loss:0.2628734134906229,Episode:339




avg loss:0.272202848316636,Episode:340




avg loss:0.24324303464718913,Episode:341




avg loss:0.26308242153537853,Episode:342




avg loss:0.2565262146974187,Episode:343




avg loss:0.2504734433208155,Episode:344




avg loss:0.2522698728196311,Episode:345




avg loss:0.25095679045219754,Episode:346




avg loss:0.25669859680478363,Episode:347




avg loss:0.25761097366727487,Episode:348




avg loss:0.2490445393983311,Episode:349




avg loss:0.26574554124384514,Episode:350




avg loss:0.2568337951356447,Episode:351




avg loss:0.2615051191316025,Episode:352




RuntimeError: rpc::rpc_error during call in function spawn_actor

### Visualise Models

In [None]:
target_net.eval()

In [None]:
torch.save(target_net.state_dict(), './model_params_CL/model_10.final')

In [21]:
vis_net = Full_DQN(n_actions,64).to(device)
vis_net.load_state_dict(torch.load('./model_params_CL/Full_model_6.best'))
vis_net.eval()



Full_DQN(
  (lin1): Linear(in_features=73, out_features=100, bias=True)
  (lin2): Linear(in_features=100, out_features=50, bias=True)
  (lin3): Linear(in_features=50, out_features=25, bias=True)
  (lin4): Linear(in_features=25, out_features=15, bias=True)
  (lin5): Linear(in_features=15, out_features=8, bias=True)
  (lin6): Linear(in_features=8, out_features=3, bias=True)
)

In [22]:
num_episodes = 8
env.use_fixed = 'H'
env.route_idx = 1
levels = ['E','M','H']
next_lvl = 4
min_overall_loss = 1000
input_size = 73
for i_episode in range(num_episodes):
    eps_loss = []
    rewards = []
    # Initialize the environment and state
    obs = env.reset()
    #ego_dir retirves the distance and angle from vehicle to nearest waypoint
    ego_location = env.ego.get_location()
    ego_dir = gym_carla.envs.misc.get_lane_dis(env.waypoints,ego_location.x,ego_location.y)
    #pos gets a distanc d and array w which has to be seperated out in below line
    ego_pos = np.asarray((ego_dir[0],ego_dir[1][0],ego_dir[1][1]),dtype=np.float32)
    state = np.concatenate((ego_pos,np.zeros(6)))
    state = torch.tensor(state).reshape(1,9,1,1)

    new_obs = torch.tensor(obs['camera'])
    new_obs = new_obs.permute(2,0,1).reshape(1,3,128,128)
    
    _,latent_space = model(new_obs)
    state = torch.cat((state,latent_space.cpu()),1).reshape(1,input_size)
    # Resize, and add a batch dimension (BCHW)
    loss = episode_loss = 1000
    for t in count():
        # Select and perform an action
        
        with torch.no_grad():
            action = vis_net(state.float()).argmax().view(1,1)
            obs, reward, done, info  = env.step(action.item())
            rewards.append(reward)
            reward = torch.tensor([reward], device=device)

            #pos gets a distanc d and array w which has to be seperated out in below line
            pos = np.asarray((info['position'][0],info['position'][1][0],info['position'][1][1]))
            ang = np.asarray(info['angular_vel'])
            acc = np.asarray(info['acceleration'])
            steer = np.asarray(info['steer'])
            next_state = np.concatenate((pos, ang, acc, steer), axis=None)
            
            new_obs = torch.tensor(obs['camera'])
            new_obs = new_obs.permute(2,0,1).reshape(1,3,128,128)
            _,latent_space = model(new_obs)
            info_state = torch.tensor(next_state).reshape(1,9,1,1)
            next_state = torch.cat((info_state,latent_space.cpu()),1).reshape(1,input_size)

        state = next_state

        if done:
            print('########')
            episode_durations.append(t + 1)
            break


[264, 234]


ArgumentError: Python argument types in
    Vehicle.get_wheel_steer_angle(Vehicle)
did not match C++ signature:
    get_wheel_steer_angle(carla::client::Vehicle {lvalue}, carla::rpc::VehicleWheelLocation wheel_location)