Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[vss_gym_3v3] implement OU noise for non controlled agents #18

Merged
merged 1 commit into from
Feb 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions envs/rc_gym/Utils/Utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
import numpy as np
import gym


# Math methods
Expand Down Expand Up @@ -68,3 +69,24 @@ def normVt(vt):

def roundTo5(x, base=5):
return int(base * round(float(x) / base))

# Base on baselines implementation
class OrnsteinUhlenbeckAction(object):
def __init__(self, action_space, theta=.17, dt=0.032, x0=None):
self.theta = theta
self.mu = (action_space.high + action_space.low) / 2
self.sigma = (action_space.high - self.mu) / 2
self.dt = dt
self.x0 = x0
self.reset()

def sample(self):
x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
self.x_prev = x
return x

def reset(self):
self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu)

def __repr__(self):
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self.mu, self.sigma)
39 changes: 25 additions & 14 deletions envs/rc_gym/vss/env_3v3/vss_gym_3v3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
import random
from rc_gym.Utils.Utils import OrnsteinUhlenbeckAction
from typing import Dict

import gym
Expand Down Expand Up @@ -61,8 +62,8 @@ def __init__(self):
high_obs_bound = [1.2, 1.2, 1.25, 1.25]
high_obs_bound += [1.2, 1.2, 1, 1, 1.25, 1.25, 1.2]*3
high_obs_bound += [1.2, 1.2, 1.25, 1.25, 1.2]*3
low_obs_bound = np.array(low_obs_bound)
high_obs_bound = np.array(high_obs_bound)
low_obs_bound = np.array(low_obs_bound, dtype=np.float32)
high_obs_bound = np.array(high_obs_bound, dtype=np.float32)

self.action_space = gym.spaces.Box(low=-1, high=1,
shape=(2, ), dtype=np.float32)
Expand All @@ -76,11 +77,19 @@ def __init__(self):
self.reward_shaping_total = None
self.v_wheel_deadzone = 0.05

self.ou_actions = []
for i in range(self.n_robots_blue + self.n_robots_yellow):
self.ou_actions.append(
OrnsteinUhlenbeckAction(self.action_space, dt=self.time_step)
)

print('Environment initialized')

def reset(self):
self.actions = None
self.reward_shaping_total = None
for ou in self.ou_actions:
ou.reset()

return super().reset()

Expand Down Expand Up @@ -129,14 +138,14 @@ def _get_commands(self, actions):
v_wheel2=v_wheel2))

# Send random commands to the other robots
for i in range(1, 3):
actions = self.action_space.sample()
for i in range(1, self.n_robots_blue):
actions = self.ou_actions[i].sample()
self.actions[i] = actions
v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions)
commands.append(Robot(yellow=False, id=i, v_wheel1=v_wheel1,
v_wheel2=v_wheel2))
for i in range(3):
actions = self.action_space.sample()
for i in range(self.n_robots_yellow):
actions = self.ou_actions[self.n_robots_blue+i].sample()
v_wheel1, v_wheel2 = self._actions_to_v_wheels(actions)
commands.append(Robot(yellow=True, id=i, v_wheel1=v_wheel1,
v_wheel2=v_wheel2))
Expand Down Expand Up @@ -286,47 +295,49 @@ def theta(): return random.uniform(-180, 180)

def same_position_ref(x, y, x_ref, y_ref, radius):
if x >= x_ref - radius and x <= x_ref + radius and \
y >= y_ref - radius and y <= y_ref + radius:
y >= y_ref - radius and y <= y_ref + radius:
return True
return False

radius_ball = 0.2
radius_robot = 0.2
same_pos = True


while same_pos:
for i in range(len(agents)):
same_pos = False
while same_position_ref(agents[i].x,agents[i].y,pos_frame.ball.x, pos_frame.ball.y, radius_ball):
while same_position_ref(agents[i].x, agents[i].y, pos_frame.ball.x, pos_frame.ball.y, radius_ball):
agents[i] = Robot(x=x(), y=y(), theta=theta())
same_pos = True
for j in range(i + 1, len(agents)):
while same_position_ref(agents[i].x, agents[i].y, agents[j].x, agents[j].y, radius_robot):
agents[i] = Robot(x=x(), y=y(), theta=theta())
same_pos = True

pos_frame.robots_blue[0] = agents[0]
pos_frame.robots_blue[1] = agents[1]
pos_frame.robots_blue[2] = agents[2]
pos_frame.robots_yellow[0] = agents[3]
pos_frame.robots_yellow[1] = agents[4]
pos_frame.robots_yellow[2] = agents[5]
pos_frame.robots_yellow[2] = agents[5]

return pos_frame

def _actions_to_v_wheels(self, actions):
left_wheel_speed = actions[0] * self.rsim.linear_speed_range
right_wheel_speed = actions[1] * self.rsim.linear_speed_range

left_wheel_speed, right_wheel_speed = np.clip(
(left_wheel_speed, right_wheel_speed),
-self.rsim.linear_speed_range,
self.rsim.linear_speed_range
)

# Deadzone
if -self.v_wheel_deadzone < left_wheel_speed < self.v_wheel_deadzone:
left_wheel_speed = 0

if -self.v_wheel_deadzone < right_wheel_speed < self.v_wheel_deadzone:
right_wheel_speed = 0

left_wheel_speed, right_wheel_speed = np.clip(
(left_wheel_speed, right_wheel_speed), -2.6, 2.6)

return left_wheel_speed, right_wheel_speed
5 changes: 4 additions & 1 deletion envs/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
# #
# '''

from rc_gym.Utils.Utils import OrnsteinUhlenbeckAction
import gym
import rc_gym
import numpy as np
import time
import rc_gym.Utils

# Using penalty env
env = gym.make('VSS3v3-v0')
Expand Down Expand Up @@ -77,8 +79,9 @@ def print_with_description(state):
# print_with_description(next_state)
epi_rew = 0.
while not done:
action = np.array([0.1, 0.])
action = np.array([0., 0.])
# action = env.action_space.sample()
# time.sleep(0.1)
next_state, reward, done, _ = env.step(action)
epi_rew += reward
# print_with_description(next_state)
Expand Down