In [1]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T   
import glob
import os
import sys
import psutil
from tqdm import tqdm_notebook as tqdm
try:
    sys.path.append(glob.glob('../carla/dist/carla-*%d.%d-%s.egg' % (
        sys.version_info.major,
        sys.version_info.minor,
        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
    pass
import carla
import random
import time
import numpy as np
import cv2
#import open3d as o3d
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import math
SECONDS_PER_EPISODE = 12

In [2]:
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython: from IPython import display

In [3]:
def xxx():
    env.world.wait_for_tick()
    for x in list(env.world.get_actors()):
        if x.type_id == 'vehicle.tesla.model3' or x.type_id == 'sensor.lidar.ray_cast' or x.type_id == 'sensor.other.collision':
            x.destroy()
            
def lidar_line(points,degree,width):
    angle = degree*(2*np.pi)/360
    points_l = points
    points_l = points_l[np.logical_and(points_l[:,2] > -1.75, points_l[:,2] < 1000)] #z
    points_l = points_l[np.logical_and(np.tan(angle)*points_l[:,0]+width*np.sqrt(1+np.tan(angle)**2)>=points_l[:,1], np.tan(angle)*points_l[:,0]-width*np.sqrt(1+np.tan(angle)**2)<=points_l[:,1])] #y
    if 180>degree >0:
        points_l = points_l[np.logical_and(points_l[:,1]>0, points_l[:,1]<1000)] #y>0
    if 180<degree<360:
        points_l = points_l[np.logical_and(points_l[:,1]<0, points_l[:,1] > -1000)] #x
    if degree == 0 or degree == 360:
        points_l = points_l[np.logical_and(points_l[:,0]>0,points_l[:,0] <1000 )] #x
    if degree == 180:
        points_l = points_l[np.logical_and(points_l[:,0] >-1000 , points_l[:,0]<0 )]
    return  points_l

In [4]:
class QValues():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    @staticmethod
    def get_current(policy_net, states, actions):
        return policy_net(states).gather(dim=1, index=actions.unsqueeze(-1))
    @staticmethod        
    def get_next(target_net, next_states):                
        final_state_locations = next_states.flatten(start_dim=1) \
        .max(dim=1)[0].eq(0).type(torch.bool)
        non_final_state_locations = (final_state_locations == False)
        non_final_states = next_states[non_final_state_locations]
        batch_size = next_states.shape[0]
        values = torch.zeros(batch_size).to(QValues.device)
        values[non_final_state_locations] = target_net(non_final_states).max(dim=1)[0].detach()
        return values

In [51]:
class DQN(nn.Module):
    
    def __init__(self,state_size):
        super().__init__()
            
        self.fc1 = nn.Linear(in_features=state_size, out_features=24)   
        self.fc2 = nn.Linear(in_features=24, out_features=32)
        self.out = nn.Linear(in_features=32, out_features=2)
        
        
    def forward(self, t):
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        return t
    
class ReplayMemory():
    
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.push_count = 0
        
    def push(self, experience):
        if len(self.memory) < self.capacity:
            self.memory.append(experience)
        else:
            self.memory[self.push_count % self.capacity] = experience
        self.push_count += 1
        
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def can_provide_sample(self, batch_size):
        return len(self.memory) >= batch_size
    
class EpsilonGreedyStrategy():
    def __init__(self, start, end, decay):
        self.start = start
        self.end = end
        self.decay = decay
        
    def get_exploration_rate(self, current_step):
        return self.end + (self.start - self.end) * \
            math.exp(-1. * current_step * self.decay)
    
class Agent():
    def __init__(self, strategy, num_actions,device):
        self.current_step = 0
        self.strategy = strategy
        self.num_actions = num_actions
        self.device = device
        
    def select_action(self, state, policy_net):
        rate = self.strategy.get_exploration_rate(self.current_step)
        self.current_step += 1

        if rate > random.random():
            action = random.randrange(self.num_actions)
            print(action)
            return torch.tensor([action]).to(self.device) # explore      
        else:
            with torch.no_grad():
                print(policy_net(state).argmax().to(self.device))
                return policy_net(state).argmax().to(self.device) # exploit

In [6]:
Experience = namedtuple(
    'Experience',
    ('state', 'action', 'next_state', 'reward')
)

In [7]:
batch_size = 20
gamma = 0.999
eps_start = 1
eps_end = 0.01
eps_decay = 0.001
target_update = 10
memory_size = 100000
lr = 0.001
num_episodes = 1000

num_action=2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)
agent = Agent(strategy, num_action , device)
memory = ReplayMemory(memory_size)

state_size=3


policy_net = DQN(state_size).to(device)
target_net = DQN(state_size).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()
optimizer = optim.Adam(params=policy_net.parameters(), lr=lr)

In [8]:
class CarEnv:
    #BRAKE_AMT = 1.0
    actor_list = []
    collision_hist = []
    pt_cloud = []
    pt_cloud_filtered = []
    
    def __init__(self):
        self.client = carla.Client('localhost', 2000)
        self.client.set_timeout(2.0)
        self.world = self.client.get_world()
        blueprint_library = self.world.get_blueprint_library()
        self.model_3 = blueprint_library.filter('model3')[0]
        self.truck_2 = blueprint_library.filter('carlamotors')[0]
        #settings = self.world.get_settings()
        #settings.no_rendering_mode = True
        #self.world.apply_settings(settings)
                     
    def reset(self):
        self.collision_hist = []
        self.actor_list = []
        self.pt_cloud = []
        self.pt_cloud_filtered = []
        place=random.uniform(110,150)
        ##print('Location: ',str(place))
        #transform = carla.Transform(carla.Location(-120,place,3),carla.Rotation(0,-90,0))
        transform = carla.Transform(carla.Location(246,-36,3),carla.Rotation(0,-90,0))        
        self.flag = 0
        self.vehicle = self.world.spawn_actor(self.model_3, transform)
        self.flag = 1
        
        self.actor_list.append(self.vehicle)
     

        self.lidar_sensor = self.world.get_blueprint_library().find('sensor.lidar.ray_cast')
        self.lidar_sensor.set_attribute('points_per_second', '100000')
        self.lidar_sensor.set_attribute('channels', '32')
        self.lidar_sensor.set_attribute('range', '10000')
        self.lidar_sensor.set_attribute('upper_fov', '10')
        self.lidar_sensor.set_attribute('lower_fov', '-10')
        self.lidar_sensor.set_attribute('rotation_frequency', '60')
        
        transform = carla.Transform(carla.Location(x=0, z=1.9))
        time.sleep(0.01)

        self.sensor = self.world.spawn_actor(self.lidar_sensor, transform, attach_to=self.vehicle)
     
        self.actor_list.append(self.sensor)
        self.sensor.listen(lambda data: self.process_lidar(data))

        self.vehicle.apply_control(carla.VehicleControl(throttle=1, brake=0.0))
        self.episode_start = time.time()
   
        time.sleep(0.4) # sleep to get things started and to not detect a collision when the car spawns/falls from sky.
        
        transform2 = carla.Transform(carla.Location(x=2.5, z=0.7))
        colsensor = self.world.get_blueprint_library().find('sensor.other.collision')
    
        #time.sleep(0.1)
        self.colsensor = self.world.spawn_actor(colsensor, transform2, attach_to=self.vehicle)
   
        self.actor_list.append(self.colsensor)
        self.colsensor.listen(lambda event: self.collision_data(event))

        while self.distance_to_obstacle_f is None:
            time.sleep(0.01)

        self.episode_start = time.time()
        
        self.vehicle.apply_control(carla.VehicleControl(throttle=1, brake=0.0))

        xx = self.distance_to_obstacle_f
        yy = self.distance_to_obstacle_r
        zz = self.distance_to_obstacle_l
   
        
        state_=np.array([xx,yy,zz])
        return state_

    def collision_data(self, event):
        self.collision_hist.append(event)

    def process_lidar(self, raw):
        points = np.frombuffer(raw.raw_data, dtype=np.dtype('f4'))
        points = np.reshape(points, (int(points.shape[0] / 3), 3))*np.array([1,-1,-1])
        
        lidar_f = lidar_line(points,90,2)
        lidar_r = lidar_line(points,45,2)
        lidar_l = lidar_line(points,135,2)

        if len(lidar_f) == 0:
            pass
        else:
            self.distance_to_obstacle_f = min(lidar_f[:,1])-2.247148275375366
        
        if len(lidar_r) == 0:
            pass
        else:
            self.distance_to_obstacle_r = np.sqrt(min(lidar_r[:,0]**2 + lidar_r[:,1]**2))
        
        if len(lidar_l) == 0:
            pass
        else:
            self.distance_to_obstacle_l = np.sqrt(min(lidar_l[:,0]**2 + lidar_l[:,1]**2))
    

    def step(self, action):
        sleepy=0.1
        if action == 0:
            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, brake=0.0, steer = 0.3))
            time.sleep(sleepy)
            reward = 0.1
        elif action == 1:
            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, brake=0.0, steer = -0.3))
            time.sleep(sleepy)
            reward =0.1

        
        if len(self.collision_hist) != 0:
            done = True
            reward = -10
        else :
            done=False
            reward=0.01
            
        if self.episode_start + SECONDS_PER_EPISODE < time.time():
            done = True
            
        xx = self.distance_to_obstacle_f
        yy = self.distance_to_obstacle_r
        zz = self.distance_to_obstacle_l
        state_=np.array([xx,yy,zz])
            
        return state_, reward, done, None
    


In [42]:
def extract_tensors(experiences):
    # Convert batch of Experiences to Experience of batches
    batch = Experience(*zip(*experiences))

    t1 = torch.cat(batch.state)
    t2 = torch.cat(batch.action)
    t3 = torch.cat(batch.reward)
    t4 = torch.cat(batch.next_state)

    return (t1,t2,t3,t4)

In [52]:
episode_durations = []
env = CarEnv()
xxx()
for episode in range(num_episodes):
    state = env.reset()
    state = torch.from_numpy(state).float().to(device)
    print(episode)
    
    for timestep in count():
        action = agent.select_action(state, policy_net)
        
        next_state, reward, done, _ = env.step(action)
        memory.push(Experience(state, action[0], next_state, reward))
        
        state = torch.from_numpy(next_state).float().to(device)
        
        if memory.can_provide_sample(batch_size):
            print('TRAIN')
            experiences = memory.sample(batch_size)
            states, actions, rewards, next_states = extract_tensors(experiences)
    
            current_q_values = QValues.get_current(policy_net, states, actions)
            next_q_values = QValues.get_next(target_net, next_states)
            target_q_values = (next_q_values * gamma) + rewards

            loss = F.mse_loss(current_q_values, target_q_values.unsqueeze(1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(loss)
        
            
        if done:
            episode_durations.append(timestep)
            #plot(episode_durations, 100)
            for actor in env.actor_list:
                actor.destroy()
            break
            
    if episode % target_update == 0:
        target_net.load_state_dict(policy_net.state_dict())
        


0
TRAIN


RuntimeError: zero-dimensional tensor (at position 0) cannot be concatenated

In [None]:
xxx()

In [44]:
experiences


[Experience(state=tensor([22.5353,  7.6613,  6.1497], device='cuda:0'), action=tensor([1], device='cuda:0'), next_state=array([22.53486276,  7.66132332,  6.1496472 ]), reward=0.01),
 Experience(state=tensor([-0.8140, 23.6337,  2.4432], device='cuda:0'), action=tensor([0], device='cuda:0'), next_state=array([-2.24196219,  1.57889511,  1.57889511]), reward=-10),
 Experience(state=tensor([22.5829,  7.6601,  6.1505], device='cuda:0'), action=tensor([1], device='cuda:0'), next_state=array([22.54787469,  7.66092366,  6.14993414]), reward=0.01),
 Experience(state=tensor([15.8860,  9.6127,  5.3174], device='cuda:0'), action=tensor([0], device='cuda:0'), next_state=array([ 5.15769887, 11.33385551,  4.4246896 ]), reward=0.01),
 Experience(state=tensor([22.6202,  7.6595,  6.1509], device='cuda:0'), action=tensor([1], device='cuda:0'), next_state=array([22.54787469,  7.66092366,  6.14993414]), reward=0.01),
 Experience(state=tensor([22.5374,  7.6611,  6.1498], device='cuda:0'), action=tensor([1], 

In [26]:
batch = Experience(*zip(*experiences))

In [53]:
torch.cat(0,1,2)

TypeError: cat() takes from 1 to 2 positional arguments but 3 were given

In [56]:
torch.cat(batch.state)

tensor([21.3053,  7.5203,  6.2569, 22.5606,  7.6607,  6.1501, 15.8860,  9.6127,
         5.3174,  7.7565,  5.5869, 12.1333, 22.5353,  7.6613,  6.1497, 20.8647,
         8.1245,  5.8071,  5.1577, 11.3339,  4.4247, 22.4091,  7.6668,  6.1432,
        22.6202,  7.6595,  6.1509, 22.1705,  7.8470,  5.9745, 22.5829,  7.6601,
         6.1505, 22.5374,  7.6611,  6.1498, 22.5353,  7.6613,  6.1497, 22.5374,
         7.6611,  6.1498, -0.8140, 23.6337,  2.4432, 19.6463,  6.9926,  6.8442,
        22.5349,  7.6613,  6.1496, 22.5353,  7.6613,  6.1497,  5.1493, 11.2286,
         3.6714,  1.4239,  3.8718, 32.1056], device='cuda:0')

In [54]:
torch.cat(batch.reward)

TypeError: expected Tensor as element 0 in argument 0, but got float

In [39]:
batch.reward

(0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 0.01,
 -10,
 0.01,
 0.01,
 0.01,
 -10,
 -10)