In [2]:
import numpy as np
import pybullet as p
import time
import gym
import gym_pybullet_mobilerobot
from gym import spaces
from gym.utils import seeding

import os
import json
import random
import time
# import sys
# sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
import torch
import torch.nn.functional as F
import gc
import torch.nn as nn
import math
from collections import deque
#---Directory Path---#
dirPath = os.path.dirname(os.path.realpath(__file__))
#---Functions to make network updates---#

NameError: name '__file__' is not defined

In [4]:
def soft_update(target, source, tau):
    for target_param, param in zip(target.parameters(), source.parameters()):
        target_param.data.copy_(target_param.data*(1.0 - tau)+ param.data*tau)

def hard_update(target,source):
    for target_param, param in zip(target.parameters(), source.parameters()):
        target_param.data.copy_(param.data)

#---Ornstein-Uhlenbeck Noise for action---#

class ActionNoise:
    # Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
    def __init__(self, action_dim, mu=0, theta=0.15, sigma=0.2):
        self.action_dim = action_dim
        self.mu = mu
        self.theta = theta
        self.sigma = sigma
        self.X = np.ones(self.action_dim)*self.mu

    def reset(self):
        self.X = np.ones(self.action_dim)*self.mu

    def sample(self):
        dx = self.theta*(self.mu - self.X)
        dx = dx + self.sigma*np.random.randn(len(self.X))
        self.X = self.X + dx
        print('aqu2i' + str(self.X))
        return self.X

#---Critic--#

EPS = 0.003
def fanin_init(size, fanin=None):
    fanin = fanin or size[0]
    v = 1./np.sqrt(fanin)
    return torch.Tensor(size).uniform_(-v,v)

class Critic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Critic, self).__init__()

        self.state_dim = state_dim = state_dim
        self.action_dim = action_dim

        self.fc1 = nn.Linear(state_dim, 256)
        self.fc1.weight.data = fanin_init(self.fc1.weight.data.size())

        self.fa1 = nn.Linear(action_dim, 256)
        self.fa1.weight.data = fanin_init(self.fa1.weight.data.size())

        self.fca1 = nn.Linear(512, 512)
        self.fca1.weight.data = fanin_init(self.fca1.weight.data.size())

        self.fca2 = nn.Linear(512, 1)
        self.fca2.weight.data.uniform_(-EPS, EPS)

    def forward(self, state, action):
        xs = torch.relu(self.fc1(state))
        xa = torch.relu(self.fa1(action))
        x = torch.cat((xs,xa), dim=1)
        x = torch.relu(self.fca1(x))
        vs = self.fca2(x)
        return vs

#---Actor---#

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, action_limit_v, action_limit_w):
        super(Actor, self).__init__()
        self.state_dim = state_dim = state_dim
        self.action_dim = action_dim
        self.action_limit_v = action_limit_v
        self.action_limit_w = action_limit_w

        self.fa1 = nn.Linear(state_dim, 512)
        self.fa1.weight.data = fanin_init(self.fa1.weight.data.size())

        self.fa2 = nn.Linear(512, 512)
        self.fa2.weight.data = fanin_init(self.fa2.weight.data.size())

        self.fa3 = nn.Linear(512, action_dim)
        self.fa3.weight.data.uniform_(-EPS,EPS)

    def forward(self, state):
        x = torch.relu(self.fa1(state))
        x = torch.relu(self.fa2(x))
        action = self.fa3(x)
        if state.shape == torch.Size([14]):
            action[0] = torch.sigmoid(action[0])*self.action_limit_v
            action[1] = torch.tanh(action[1])*self.action_limit_w
        else:
            action[:,0] = torch.sigmoid(action[:,0])*self.action_limit_v
            action[:,1] = torch.tanh(action[:,1])*self.action_limit_w
        return action

#---Memory Buffer---#

class MemoryBuffer:
    def __init__(self, size):
        self.buffer = deque(maxlen=size)
        self.maxSize = size
        self.len = 0

    def sample(self, count):
        batch = []
        count = min(count, self.len)
        batch = random.sample(self.buffer, count)

        s_array = np.float32([array[0] for array in batch])
        a_array = np.float32([array[1] for array in batch])
        r_array = np.float32([array[2] for array in batch])
        new_s_array = np.float32([array[3] for array in batch])

        return s_array, a_array, r_array, new_s_array

    def len(self):
        return self.len

    def add(self, s, a, r, new_s):
        transition = (s, a, r, new_s)
        self.len += 1
        if self.len > self.maxSize:
            self.len = self.maxSize
        self.buffer.append(transition)


In [5]:
#---Where the train is made---#

BATCH_SIZE = 128
LEARNING_RATE = 0.001
GAMMA = 0.99
TAU = 0.001

class Trainer:

    def __init__(self, state_dim, action_dim, action_limit_v, action_limit_w, ram):

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_limit_v = action_limit_v
        self.action_limit_w = action_limit_w
        #print('w',self.action_limit_w)
        self.ram = ram
        #self.iter = 0
        self.noise = ActionNoise(self.action_dim)

        self.actor = Actor(self.state_dim, self.action_dim, self.action_limit_v, self.action_limit_w)
        self.target_actor = Actor(self.state_dim, self.action_dim, self.action_limit_v, self.action_limit_w)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), LEARNING_RATE)

        self.critic = Critic(self.state_dim, self.action_dim)
        self.target_critic = Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), LEARNING_RATE)

        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)

    def get_exploitation_action(self,state):
        state = torch.from_numpy(state)
        action = self.target_actor.forward(state).detach()
        #print('actionploi', action)
        return action.data.numpy()

    def get_exploration_action(self, state):
        state = torch.from_numpy(state)
        action = self.actor.forward(state).detach()
        #noise = self.noise.sample()
        #print('noisea', noise)
        #noise[0] = noise[0]*self.action_limit_v
        #noise[1] = noise[1]*self.action_limit_w
        #print('noise', noise)
        new_action = action.data.numpy() #+ noise
        #print('action_no', new_action)
        return new_action

    def optimizer(self):
        s_sample, a_sample, r_sample, new_s_sample = ram.sample(BATCH_SIZE)

        s_sample = torch.from_numpy(s_sample)
        a_sample = torch.from_numpy(a_sample)
        r_sample = torch.from_numpy(r_sample)
        new_s_sample = torch.from_numpy(new_s_sample)

        #-------------- optimize critic

        a_target = self.target_actor.forward(new_s_sample).detach()
        next_value = torch.squeeze(self.target_critic.forward(new_s_sample, a_target).detach())
        # y_exp = r _ gamma*Q'(s', P'(s'))
        y_expected = r_sample + GAMMA*next_value
        # y_pred = Q(s,a)
        y_predicted = torch.squeeze(self.critic.forward(s_sample, a_sample))

        #----------------------------
        loss_critic = F.smooth_l1_loss(y_predicted, y_expected)

        self.critic_optimizer.zero_grad()
        loss_critic.backward()
        self.critic_optimizer.step()

        #------------ optimize actor
        pred_a_sample = self.actor.forward(s_sample)
        loss_actor = -1*torch.sum(self.critic.forward(s_sample, pred_a_sample))

        self.actor_optimizer.zero_grad()
        loss_actor.backward()
        self.actor_optimizer.step()

        soft_update(self.target_actor, self.actor, TAU)
        soft_update(self.target_critic, self.critic, TAU)

    def save_models(self, episode_count):
        torch.save(self.target_actor.state_dict(), '/Users/sarathmenon/Downloads/TD3-master/project_exp_models/'+str(episode_count)+'_actor.pt')
        torch.save(self.target_critic.state_dict(), '/Users/sarathmenon/Downloads/TD3-master/project_exp_models/'+str(episode_count)+'_critic.pt')
        print('****Models saved***')

    def load_models(self, episode_count):
        self.actor.load_state_dict(torch.load('/Users/sarathmenon/Downloads/TD3-master/project_exp_models/'+str(episode_count)+'_actor.pt'))
        self.critic.load_state_dict(torch.load('/Users/sarathmenon/Downloads/TD3-master/project_exp_models/'+str(episode_count)+'_critic.pt'))
        hard_update(self.target_actor, self.actor)
        hard_update(self.target_critic, self.critic)
        print('***Models load***')

#---Run agent---#

is_training = True

if is_training:
    exploration_rate = 1
    max_exploration_rate = 1
    min_exploration_rate = 0.05
else:
    exploration_rate = 0.05
    max_exploration_rate = 0.05
    min_exploration_rate = 0.05

exploration_decay_rate = 0.001

MAX_EPISODES = 10001
MAX_STEPS = 1000          # Important parameter
MAX_BUFFER = 100000
rewards_all_episodes = []

STATE_DIMENSION = 14
ACTION_DIMENSION = 2
ACTION_V_MAX = 2.0 # m/s
ACTION_W_MAX = 1.0# rad/s

if is_training:
    var_v = ACTION_V_MAX*0.20
    var_w = ACTION_W_MAX*2*0.20
else:
    var_v = ACTION_V_MAX*0.10
    var_w = ACTION_W_MAX*0.10

print('State Dimensions: ' + str(STATE_DIMENSION))
print('Action Dimensions: ' + str(ACTION_DIMENSION))
print('Action Max: ' + str(ACTION_V_MAX) + ' m/s and ' + str(ACTION_W_MAX) + ' rad/s')
ram = MemoryBuffer(MAX_BUFFER)
trainer = Trainer(STATE_DIMENSION, ACTION_DIMENSION, ACTION_V_MAX, ACTION_W_MAX, ram)
episode_count = 160
trainer.load_models(episode_count)

State Dimensions: 14
Action Dimensions: 2
Action Max: 2.0 m/s and 1.0 rad/s
***Models load***


In [6]:
if __name__ == '__main__':
    env = gym.make("Mobilerobot_Pybullet-v0")

    start_time = time.time()
    past_action = np.array([0.,0.])

    for ep in range(MAX_EPISODES):
        ep+=episode_count
        done = False
        state = env.reset()
        print('Episode: ' + str(ep))

        rewards_current_episode = 0
        for step in range(MAX_STEPS):
            #print(step)
            state = np.float32(state)
            #action = trainer.get_exploration_action(state)
            #print('actionA:', action)

            #action[0] = np.clip(np.random.normal(action[0], var_v), 0., ACTION_V_MAX)
            #action[1] = np.clip(np.random.normal(action[1], var_w), -ACTION_W_MAX, ACTION_W_MAX)
            #print('actionD', action)

            if is_training:
                #if ep%2 == 0:
                #    action = trainer.get_exploitation_action(state)
                #    #print('aa', action)
                #else:
                action = trainer.get_exploration_action(state)
                #    #print('aa',action)
                action[0] = np.clip(np.random.normal(action[0], var_v), 0., ACTION_V_MAX)
                action[1] = np.clip(np.random.normal(action[1], var_w), -ACTION_W_MAX, ACTION_W_MAX)
                #action[0] = np.clip(action[0], 0., ACTION_V_MAX)
                #action[1] = np.clip(action[1], -ACTION_W_MAX, ACTION_W_MAX)
            #print('af', action)

            #exploration_rate_threshold = random.uniform(0., 1.)
            #if exploration_rate_threshold > exploration_rate:
            #    action = trainer.get_exploration_action(state)
            #else:
            #    action = np.array([np.random.uniform(0,0.22), np.random.uniform(-1,1)])

            if not is_training:
                #print('nor')
                action = trainer.get_exploitation_action(state)
            #print('state',state)
            #print('action',action)
            #print('ap',past_action)
            print('Linear:',action[0],'Angular:',action[1])
            action = env.differential_drive(action)
            next_state, reward, done, _ = env.step(action)#, past_action)
            print('episode_count:',ep,'action', action,'r',reward)
            # past_action = action

            rewards_current_episode += reward
            next_state = np.float32(next_state)
            ram.add(state, action, reward, next_state)
            state = next_state

            #action = np.array([np.random.uniform(0.,0.15), np.random.uniform(-0.5, 0.5)])
            #print('r: ' + str(reward) + ' and done: ' + str(done))

            if ram.len >= 2*MAX_STEPS and is_training:
                var_v = max([var_v*0.99999, 0.10*ACTION_V_MAX])
                var_w = max([var_w*0.99999, 0.10*ACTION_W_MAX])
                trainer.optimizer()
            #if is_training:
            #    trainer.optimizer()

            if done or step == MAX_STEPS-1:
                print('reward per ep: ' + str(rewards_current_episode))
                print('explore_v: ' + str(var_v) + ' and explore_w: ' + str(var_w))
                rewards_all_episodes.append(rewards_current_episode)
                m, s = divmod(int(time.time() - start_time), 60)
                h, m = divmod(m, 60)
                break
        exploration_rate = (min_exploration_rate +
                (max_exploration_rate - min_exploration_rate)* np.exp(-exploration_decay_rate*ep))
        gc.collect()
        #print('exp:', exploration_rate)
        if ep%10 == 0:
            trainer.save_models(ep)

print('Completed Training')

1234: /Users/sarathmenon/Downloads/gym_pybullet_mobilerobot/gym_pybullet_mobilerobot/envs/data/wall.urdf
Episode: 160
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r 393.49164787025705
Linear: 0.0 Angular: -0.63662535
yes
episode_count: 160 action [-6.3662535  6.3662535] r -8.0
Linear: 0.5364777 Angular: -0.4131098
yes
episode_count: 160 action [1.23367876 9.49587464] r -8.0
Linear: 0.04231024 Angular: -1.0
yes
episode_count: 160 action [-9.57689762 10.42310238] r -8.0
Linear: 0.5222706 Angular: -1.0
yes
episode_count: 160 action [-4.7772938  15.22270679] r -8.0
Linear: 0.031041699 Angular: -1.0
yes
episode_count: 160 action [-9.68958318 10.31041741] r -8.0
Linear: 0.44213066 Angular: -0.54430366
yes
episode_count: 160 action [-1.02173001  9.8643434 ] r -8.0
Linear: 0.5875429 Angular: -1.0
yes
episode_count: 160 action [-4.12457108 15.87542892] r -8.0
Linear: 0.16075706 Angular: -0.3447336
yes
episode_count: 160 action [-1.83976531  5.05490661] r -8.0
Linear: 0.0 

Linear: 0.6630479 Angular: 0.1136212
yes
episode_count: 160 action [7.76669085 5.49426734] r -8.0
Linear: 0.0 Angular: 0.7209717
yes
episode_count: 160 action [ 7.20971704 -7.20971704] r -8.0
Linear: 0.40550527 Angular: -0.35641342
yes
episode_count: 160 action [0.49091846 7.61918664] r -8.0
Linear: 0.0 Angular: 0.3651417
yes
episode_count: 160 action [ 3.6514169 -3.6514169] r -8.0
Linear: 0.0 Angular: -0.6046883
yes
episode_count: 160 action [-6.04688287  6.04688287] r -8.0
Linear: 0.0 Angular: 0.046606712
yes
episode_count: 160 action [ 0.46606712 -0.46606712] r -8.0
Linear: 0.5125793 Angular: -0.18750508
yes
episode_count: 160 action [3.25074255 7.00084388] r -8.0
Linear: 0.0 Angular: -0.51210445
yes
episode_count: 160 action [-5.12104452  5.12104452] r -8.0
Linear: 0.0 Angular: -0.14043929
yes
episode_count: 160 action [-1.40439287  1.40439287] r -8.0
Linear: 0.0 Angular: 0.5210485
yes
episode_count: 160 action [ 5.21048486 -5.21048486] r -8.0
Linear: 0.16834296 Angular: -0.1394875

Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r 0.11911260262249534
Linear: 0.37886682 Angular: -0.96545863
yes
episode_count: 160 action [-5.8659184  13.44325423] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.21002525
yes
episode_count: 160 action [ 2.10025251 -2.10025251] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.3460913 Angular: 0.6687316
yes
episode_count: 160 action [10.14822841 -3.2264027 ] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.14475065 Angular: 1.0
yes
episode_count: 160 action [11.44750595 -8.55249345] r -8.0
Linear: 0.11555475 Angular: 0.83798975
yes
episode_count: 160 action [ 9.53544497 -7.22434998] r -8.0
Linear: 0.0 Angular: 0.50837433
yes
episode_count: 160 action [ 5.08374333 -5.08374333] r -8.0
Linear: 0.0 Angular: 0.74911
yes
episode_count: 160 action [ 7.49109983 -7.49109983] r -8.0
Linear

Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.12565145 Angular: 0.9728865
yes
episode_count: 160 action [10.98537922 -8.47235084] r -8.0
Linear: 0.061033115 Angular: 0.999326
yes
episode_count: 160 action [10.6035912  -9.38292861] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.25218448 Angular: 0.8570943
yes
episode_count: 160 action [11.09278798 -6.04909778] r -8.0
Linear: 0.2754003 Angular: -1.0
yes
episode_count: 160 action [-7.24599719 12.75400281] r -8.0
Linear: 0.75298554 Angular: -1.0
yes
episode_count: 160 action [-2.47014463 17.52985477] r -8.0
Linear: 0.0 Angular: -0.82122993
yes
episode_count: 160 action [-8.21229935  8.21229935] r -8.0
Linear: 0.5170892 Angular: -0.8094108
yes
episode_count: 160 action [-2.92321622 13.26499939] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.031737704 Angular: -0.7494366
yes
episode_count: 160 action [-7.17698932  7.

Linear: 0.0 Angular: 0.2753389
yes
episode_count: 160 action [ 2.75338888 -2.75338888] r -8.0
Linear: 0.0 Angular: 0.6284546
yes
episode_count: 160 action [ 6.28454626 -6.28454626] r 0.013469650893860319
Linear: 0.22444892 Angular: 1.0
yes
episode_count: 160 action [12.24448919 -7.75551081] r -8.0
Linear: 0.16059467 Angular: 1.0
yes
episode_count: 160 action [11.60594702 -8.39405298] r 0.012034947572470145
Linear: 0.0 Angular: 0.80527526
yes
episode_count: 160 action [ 8.05275261 -8.05275261] r 0.034512758562144086
Linear: 0.280641 Angular: 1.0
yes
episode_count: 160 action [12.8064096 -7.1935904] r -8.0
Linear: 0.0 Angular: 0.7345228
yes
episode_count: 160 action [ 7.3452282 -7.3452282] r -8.0
Linear: 0.0 Angular: -0.92474145
yes
episode_count: 160 action [-9.24741447  9.24741447] r 0.08558703570060722
Linear: 0.18863955 Angular: -1.0
yes
episode_count: 160 action [-8.11360478 11.88639522] r 0.1020334479963303
Linear: 0.35321465 Angular: -1.0
yes
episode_count: 160 action [-6.46785378

Linear: 0.44368342 Angular: -1.0
yes
episode_count: 160 action [-5.56316614 14.43683386] r 0.11633016683125774
Linear: 0.07802294 Angular: -1.0
yes
episode_count: 160 action [-9.21977043 10.78022957] r 0.11948315249870056
Linear: 0.4510583 Angular: -0.963284
yes
episode_count: 160 action [-5.12225747 14.14342284] r -8.0
Linear: 0.0 Angular: -0.6568926
yes
episode_count: 160 action [-6.56892598  6.56892598] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.15976644 Angular: 0.9289811
yes
episode_count: 160 action [10.88747501 -7.6921469 ] r -8.0
Linear: 0.0 Angular: 0.9947691
yes
episode_count: 160 action [ 9.94769096 -9.94769096] r -8.0
Linear: 0.61488307 Angular: 1.0
yes
episode_count: 160 action [16.14883065 -3.85116935] r -8.0
Linear: 0.3090821 Angular: 0.7474767
yes
episode_count: 160 action [10.56558847 -4.38394606] r -8.0
Linear: 0.079772495 Angular: -1.0
yes
episode_count: 160 action [-9.20227528 10.79772472] r -8.0
Linear: 0.0 Angular: -

Linear: 0.20766115 Angular: 0.20924678
yes
episode_count: 160 action [ 4.16907936 -0.01585633] r -8.0
Linear: 0.49744427 Angular: 0.9213358
yes
episode_count: 160 action [14.18780088 -4.23891544] r -8.0
Linear: 0.4064413 Angular: 0.7551261
yes
episode_count: 160 action [11.6156745  -3.48684818] r -8.0
Linear: 0.49763358 Angular: -1.0
yes
episode_count: 160 action [-5.02366424 14.97633576] r -8.0
Linear: 0.0 Angular: -0.92266107
yes
episode_count: 160 action [-9.22661066  9.22661066] r -8.0
Linear: 0.0 Angular: -0.7373294
yes
episode_count: 160 action [-7.37329423  7.37329423] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.0 Angular: 0.7400945
yes
episode_count: 160 action [ 7.40094483 -7.40094483] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.56291294
yes
episode_count: 160 action [ 5.62912941 -5.62912941] r -8.0
Linear: 0.93264824 Angular: 1.0
yes
episode_count: 160 action [19.326481

Linear: 0.0 Angular: 0.47271943
yes
episode_count: 160 action [ 4.72719431 -4.72719431] r -8.0
Linear: 0.51710355 Angular: 0.78693545
yes
episode_count: 160 action [13.04039001 -2.69831896] r -8.0
Linear: 0.042153362 Angular: 1.0
yes
episode_count: 160 action [10.42153358 -9.57846642] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.9271079
yes
episode_count: 160 action [ 9.27107871 -9.27107871] r -8.0
Linear: 0.38252112 Angular: -1.0
yes
episode_count: 160 action [-6.17478848 13.82521152] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.6886347 Angular: -1.0
yes
episode_count: 160 action [-3.11365306 16.88634634] r -8.0
Linear: 0.17479073 Angular: 0.72550803
yes
episo

Linear: 0.0 Angular: 0.4876232
yes
episode_count: 160 action [ 4.87623215 -4.87623215] r -8.0
Linear: 0.10315961 Angular: -1.0
yes
episode_count: 160 action [-8.96840394 11.03159666] r -8.0
Linear: 0.58955675 Angular: -1.0
yes
episode_count: 160 action [-4.10443246 15.89556694] r 0.031132224489605065
Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r 0.07936107037820328
Linear: 0.0 Angular: -0.311913
yes
episode_count: 160 action [-3.11913013  3.11913013] r 0.1675328324883374
Linear: 0.2777291 Angular: -0.8682541
yes
episode_count: 160 action [-5.90525031 11.45983219] r 0.07164061712496839
Linear: 0.21382807 Angular: 0.34195095
yes
episode_count: 160 action [ 5.5577904  -1.28122881] r 0.017436504581613832
Linear: 0.0 Angular: 0.6865476
yes
episode_count: 160 action [ 6.86547577 -6.86547577] r -8.0
Linear: 0.027600188 Angular: 1.0
yes
episode_count: 160 action [10.27600169 -9.72399831] r -8.0
Linear: 0.0 Angular: 0.39874324
yes
episode_count: 160 action [ 3.98743242 -

Linear: 0.0 Angular: -1.0
yes
episode_count: 160 action [-10.  10.] r -8.0
Linear: 0.012839672 Angular: -1.0
yes
episode_count: 160 action [-9.87160325 10.12839675] r 0.009446824648340346
Linear: 0.0 Angular: -0.5698265
yes
episode_count: 160 action [-5.69826484  5.69826484] r -8.0
Linear: 0.07813047 Angular: -1.0
yes
episode_count: 160 action [-9.21869516 10.78130484] r -8.0
Linear: 0.008154476 Angular: -1.0
yes
episode_count: 160 action [-9.91845548 10.08154511] r 0.015102417282841785
Linear: 0.0 Angular: 0.16190171
yes
episode_count: 160 action [ 1.61901712 -1.61901712] r 0.024095467905249635
Linear: 0.3798574 Angular: 0.79073465
yes
episode_count: 160 action [11.7059207  -4.10877258] r 0.03162127211231436
Linear: 0.48597625 Angular: 0.9686938
yes
episode_count: 160 action [14.54670072 -4.82717544] r 0.10206048572143356
Linear: 0.4245116 Angular: 0.8845454
yes
episode_count: 160 action [13.09056997 -4.60033774] r 0.05263398887065396
Linear: 0.0 Angular: 1.0
yes
episode_count: 160 ac

Linear: 0.06271907 Angular: 1.0
yes
episode_count: 161 action [10.62719107 -9.37280953] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.08173599 Angular: 0.9185092
yes
episode_count: 161 action [10.00245214 -8.36773217] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.7991344
yes
episode_count: 161 action [ 7.99134374 -7.99134374] r -8.0
Linear: 0.0 Angular: 0.66391754
yes
episode_count: 161 action [ 6.63917542 -6.63917542] r -8.0
Linear: 0.33988348 Angular: -1.0
yes
episode_count: 161 action [-6.60116553 13.39883447] r -8.0
Linear: 0.3608476 Angular: -0.6803166
yes
episode_count: 161 action [-3.19469035 10.4116416 ] r -8.0
Linear: 0.0 Angular: -0.8929546
yes
episode_count: 161 action [-8.92954588  8.92954588] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 161 action [-10.  10.] r -8.0
Linear: 0.0 Angular: -0.99

Linear: 0.0 Angular: 0.43053904
yes
episode_count: 161 action [ 4.30539042 -4.30539042] r -8.0
Linear: 0.46600884 Angular: 0.70384544
yes
episode_count: 161 action [11.69854283 -2.37836599] r -8.0
Linear: 0.6123211 Angular: 0.9094455
yes
episode_count: 161 action [15.21766663 -2.97124445] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.3589339 Angular: 0.42504638
yes
episode_count: 161 action [ 7.8398025  -0.66112489] r -8.0
Linear: 0.0 Angular: 0.5960737
yes
episode_count: 161 action [ 5.96073687 -5.96073687] r -8.0
Linear: 0.01221876 Angular: 0.7191137
yes
episode_count: 161 action [ 7.31332481 -7.06894934] r -8.0
Linear: 0.32063276 Angular: 1.0
yes
episode_count: 161 action [13.20632696 -6.79367244] r -8.0
Linear: 0.01578677 Angular: 1.0
yes
episode_count: 161 action [10.15786767 -9.84213233] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.949472
yes
episode_count: 161 action [ 9.49472

Linear: 0.016763667 Angular: 0.6863988
yes
episode_count: 161 action [ 7.03162491 -6.69635117] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.27464744 Angular: 0.58516395
yes
episode_count: 161 action [ 8.59811425 -3.10516506] r -8.0
Linear: 0.0 Angular: 0.52546155
yes
episode_count: 161 action [ 5.25461555 -5.25461555] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 161 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.69869286
yes
episode_count: 161 action [ 6.98692858 -6.98692858] r 0.008675529821289274
Linear: 0.07674952 Angular: -1.0
yes
episode_count: 161 action [-9.23250496 10.76749563] r -8.0
Linear: 0.0 Angular: -0.6659619
yes
episode_count: 161 action [-6.65961921  6.65961921] r -8.0
Linear: 0.0 Angular: -0.38352463
yes
episode_count: 161 action [-3.83524626  3.83524626] r -8.0
Linear: 0.01921563 Angular: -0.9641534
yes
episode_count: 161 action [-9.44937766

Linear: 0.608565 Angular: 0.9473341
yes
episode_count: 162 action [15.55899143 -3.38769138] r 149.44780549735057
Linear: 0.32732686 Angular: 1.0
yes
episode_count: 162 action [13.27326894 -6.72673106] r 0.01554349341468253
Linear: 0.0 Angular: 0.9484018
yes
episode_count: 162 action [ 9.48401809 -9.48401809] r 0.07065147287685924
Linear: 0.46705988 Angular: 0.83490986
yes
episode_count: 162 action [13.01969767 -3.67849976] r -8.0
Linear: 0.0 Angular: 0.20508361
yes
episode_count: 162 action [ 2.05083609 -2.05083609] r -8.0
Linear: 0.20021473 Angular: 0.8989535
yes
episode_count: 162 action [10.99168181 -6.98738754] r -8.0
Linear: 0.08452476 Angular: 0.8503785
yes
episode_count: 162 action [ 9.34903264 -7.65853763] r -8.0
Linear: 0.0 Angular: 0.6048716
yes
episode_count: 162 action [ 6.04871571 -6.04871571] r -8.0
Linear: 0.20296207 Angular: 1.0
yes
episode_count: 162 action [12.02962041 -7.97037959] r -8.0
Linear: 0.0 Angular: 0.84066606
yes
episode_count: 162 action [ 8.40666056 -8.40

episode_count: 162 action [11.45321369 -8.54678631] r -8.0
Linear: 0.0 Angular: 0.3308974
yes
episode_count: 162 action [ 3.30897391 -3.30897391] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 162 action [-10.  10.] r -8.0
Linear: 0.437413 Angular: -0.8850712
yes
episode_count: 162 action [-4.47658211 13.22484255] r -8.0
Linear: 0.0 Angular: -0.88187206
yes
episode_count: 162 action [-8.81872058  8.81872058] r 0.03941088359598055
Linear: 0.74321944 Angular: -0.7143953
yes
episode_count: 162 action [ 0.28824151 14.5761466 ] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0
Linear: 0.26165143 Angular: 0.38785285
yes
episode_count: 162 action [ 6.49504304 -1.26201421] r -8.0
Linear: 0.0 Angular: -0.94888896
yes
episode_count: 162 action [-9.48888958  9.48888958] r -8.0
Linear: 0.32408795 Angular: -1.0
yes
episode_count: 162 action [-6.75912023 13.24087977] r -8.0
Linear: 0.0 Angular: 0.8560235
yes
episode_count: 162 action [ 8.5602349 -8.5602349] r -8.0

Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r 0.1802292218203716
Linear: 1.06897 Angular: -1.0
yes
episode_count: 162 action [ 0.68969965 20.68969965] r -8.0
Linear: 0.0 Angular: -0.26009873
yes
episode_count: 162 action [-2.60098726  2.60098726] r -8.0
Linear: 0.40670803 Angular: -0.72965705
yes
episode_count: 162 action [-3.22949022 11.36365056] r -8.0
Linear: 0.5198512 Angular: -0.7738831
yes
episode_count: 162 action [-2.54031897 12.93734312] r -8.0
Linear: 0.25298744 Angular: -0.99085504
yes
episode_count: 162 action [-7.37867594 12.43842483] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 162 action [-10.  10.] r -8.0
Linear: 0.37013182 Angular: 1.0
yes
episode_count: 162 action [13.7013185 -6.2986815] r -8.0
Linear: 0.21381955 Angular: 0.5121978
yes
episode_count: 162 action [ 7.26017356 -2.98378229] r -8.0
Linear: 0.50919837 Angular: 0.93023056
yes
episode_count: 162 action [14.39428926 -4.2103219 ] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 1

Linear: 0.2382249 Angular: -0.8641253
yes
episode_count: 162 action [-6.25900388 11.02350235] r -8.0
Linear: 0.16761312 Angular: -0.500094
yes
episode_count: 162 action [-3.32480878  6.67707086] r -8.0
Linear: 0.0884205 Angular: 0.959321
yes
episode_count: 162 action [10.47741532 -8.70900512] r -8.0
Linear: 0.18754636 Angular: 0.6223489
yes
episode_count: 162 action [ 8.09895277 -4.34802532] r -8.0
Linear: 0.9660709 Angular: 0.8120587
yes
episode_count: 162 action [17.78129578  1.54012203] r -8.0
Linear: 0.34582093 Angular: 1.0
yes
episode_count: 162 action [13.45820904 -6.54179096] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0
Linear: 0.4295322 Angular: -1.0
yes
episode_count: 162 action [-5.7046783 14.2953217] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 162 action [-10.  10.] r -8.0
Linear: 0.0 Angular: -0.8488213
yes
episode_count: 162 action [-8.48821282  8.48821282] r -8.0
Linear: 0.28175768 Angular: -0.609387
yes
episode_count: 162 actio

Linear: 0.43663883 Angular: 1.0
yes
episode_count: 162 action [14.36638832 -5.63361168] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 0.35820806
yes
episode_count: 162 action [ 3.5820806 -3.5820806] r -8.0
Linear: 0.0 Angular: -1.0
yes
episode_count: 162 action [-10.  10.] r -8.0
Linear: 0.2889375 Angular: -0.84093785
yes
episode_count: 162 action [-5.52000344 11.29875422] r -8.0
Linear: 0.5221649 Angular: -0.36225823
yes
episode_count: 162 action [1.59906656 8.84423137] r -8.0
Linear: 0.2942649 Angular: -0.48980755
yes
episode_count: 162 action [-1.95542634  7.84072459] r -8.0
Linear: 0.0 Angular: -0.7707432
yes
episode_count: 162 action [-7.70743191  7.70743191] r -8.0
Linear: 0.31073323 Angular: 0.3236614
yes
episode_count: 162 action [ 6.34394646 -0.12928158] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0
Linear: 0.0 Ang

Linear: 0.0 Angular: -1.0
yes
episode_count: 162 action [-10.  10.] r -8.0
Linear: 0.0 Angular: -0.6668192
yes
episode_count: 162 action [-6.66819215  6.66819215] r -8.0
Linear: 0.0 Angular: 0.7889852
yes
episode_count: 162 action [ 7.88985193 -7.88985193] r 0.07949092021297677
Linear: 0.09218503 Angular: 1.0
yes
episode_count: 162 action [10.9218502 -9.0781498] r 0.04359573689278751
Linear: 0.17502913 Angular: 0.5044119
yes
episode_count: 162 action [ 6.79440975 -3.29382747] r 0.0011068805303660056
Linear: 0.58414507 Angular: 0.97620964
yes
episode_count: 162 action [15.6035471  -3.92064571] r -8.0
Linear: 0.0 Angular: 0.79799944
yes
episode_count: 162 action [ 7.97999442 -7.97999442] r -8.0
Linear: 0.119919986 Angular: 1.0
yes
episode_count: 162 action [11.19920015 -8.80079985] r -8.0
Linear: 0.0 Angular: -0.23438016
yes
episode_count: 162 action [-2.34380156  2.34380156] r -8.0
Linear: 0.681418 Angular: -1.0
yes
episode_count: 162 action [-3.18581998 16.81417942] r -8.0
Linear: 0.08

episode_count: 162 action [-6.4997685 13.5002315] r -8.0
Linear: 0.42945772 Angular: -0.50605226
yes
episode_count: 162 action [-0.76594532  9.3550998 ] r -8.0
Linear: 0.07865403 Angular: 0.4120226
yes
episode_count: 162 action [ 4.90676612 -3.3336857 ] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0
Linear: 0.3028721 Angular: 1.0
yes
episode_count: 162 action [13.02872062 -6.97127938] r -8.0
Linear: 0.0 Angular: -0.9695035
yes
episode_count: 162 action [-9.69503522  9.69503522] r -8.0
Linear: 0.53256774 Angular: -1.0
yes
episode_count: 162 action [-4.67432261 15.32567739] r -8.0
Linear: 0.0 Angular: -0.9757831
yes
episode_count: 162 action [-9.7578311  9.7578311] r -8.0
Linear: 0.38424766 Angular: -1.0
yes
episode_count: 162 action [-6.15752339 13.84247661] r -8.0
Linear: 0.10543604 Angular: 0.14942604
yes
episode_count: 162 action [ 2.5486207  -0.43990001] r -8.0
Linear: 0.0 Angular: 1.0
yes
episode_count: 162 action [ 10. -10.] r -8.0


KeyboardInterrupt: 