Skip to content

Commit

Permalink
Add reset_noise_scale argument for custom mujoco envs
Browse files Browse the repository at this point in the history
  • Loading branch information
hartikainen committed Jan 5, 2019
1 parent 5e4ea91 commit 2c30571
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 17 deletions.
12 changes: 10 additions & 2 deletions softlearning/environments/gym/mujoco/ant_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(self,
terminate_when_unhealthy=True,
healthy_z_range=(0.2, 1.0),
contact_force_range=(-1.0, 1.0),
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True):
self._ctrl_cost_weight = ctrl_cost_weight
self._contact_cost_weight = contact_cost_weight
Expand All @@ -26,6 +27,8 @@ def __init__(self,

self._contact_force_range = contact_force_range

self._reset_noise_scale = reset_noise_scale

self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)

Expand All @@ -38,6 +41,7 @@ def __init__(self,
terminate_when_unhealthy=self._terminate_when_unhealthy,
healthy_z_range=self._healthy_z_range,
contact_force_range=self._contact_force_range,
reset_noise_scale=self._reset_noise_scale,
exclude_current_positions_from_observation=(
self._exclude_current_positions_from_observation))

Expand Down Expand Up @@ -119,9 +123,13 @@ def _get_obs(self):
return observations

def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale

qpos = self.init_qpos + self.np_random.uniform(
size=self.model.nq, low=-0.1, high=0.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
self.model.nv)
self.set_state(qpos, qvel)

observation = self._get_obs()
Expand Down
13 changes: 10 additions & 3 deletions softlearning/environments/gym/mujoco/half_cheetah_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@ class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
forward_reward_weight=1.0,
ctrl_cost_weight=0.1,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True):
self._forward_reward_weight = forward_reward_weight

self._ctrl_cost_weight = ctrl_cost_weight

self._reset_noise_scale = reset_noise_scale

self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)

Expand All @@ -25,6 +28,7 @@ def __init__(self,
self,
forward_reward_weight=self._forward_reward_weight,
ctrl_cost_weight=self._ctrl_cost_weight,
reset_noise_scale=self._reset_noise_scale,
exclude_current_positions_from_observation=(
self._exclude_current_positions_from_observation),
)
Expand Down Expand Up @@ -68,10 +72,13 @@ def _get_obs(self):
return observation

def reset_model(self):
c = 0.1
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale

qpos = self.init_qpos + self.np_random.uniform(
low=-c, high=c, size=self.model.nq)
qvel = self.init_qvel + c * self.np_random.randn(self.model.nv)
low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
self.model.nv)

self.set_state(qpos, qvel)

Expand Down
12 changes: 9 additions & 3 deletions softlearning/environments/gym/mujoco/hopper_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float('inf')),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True):
self._forward_reward_weight = forward_reward_weight

Expand All @@ -32,6 +33,8 @@ def __init__(self,
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range

self._reset_noise_scale = reset_noise_scale

self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)

Expand All @@ -44,6 +47,7 @@ def __init__(self,
terminate_when_unhealthy=self._terminate_when_unhealthy,
healthy_z_range=self._healthy_z_range,
healthy_angle_range=self._healthy_angle_range,
reset_noise_scale=self._reset_noise_scale,
exclude_current_positions_from_observation=(
self._exclude_current_positions_from_observation),
)
Expand Down Expand Up @@ -121,11 +125,13 @@ def step(self, action):
return observation, reward, done, info

def reset_model(self):
c = 5e-3
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale

qpos = self.init_qpos + self.np_random.uniform(
low=-c, high=c, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(
low=-c, high=c, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv)

self.set_state(qpos, qvel)

Expand Down
13 changes: 10 additions & 3 deletions softlearning/environments/gym/mujoco/humanoid_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self,
healthy_reward=5.0,
terminate_when_unhealthy=True,
healthy_z_range=(1.0, 2.0),
reset_noise_scale=1e-2,
exclude_current_positions_from_observation=True):
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
Expand All @@ -34,6 +35,9 @@ def __init__(self,
self._healthy_reward = healthy_reward
self._terminate_when_unhealthy = terminate_when_unhealthy
self._healthy_z_range = healthy_z_range

self._reset_noise_scale = reset_noise_scale

self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)

Expand All @@ -47,6 +51,7 @@ def __init__(self,
contact_cost_weight=self._contact_cost_weight,
contact_cost_range=self._contact_cost_range,
healthy_z_range=self._healthy_z_range,
reset_noise_scale=self._reset_noise_scale,
exclude_current_positions_from_observation=(
self._exclude_current_positions_from_observation))

Expand Down Expand Up @@ -137,11 +142,13 @@ def step(self, action):
return observation, reward, done, info

def reset_model(self):
c = 0.01
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale

qpos = self.init_qpos + self.np_random.uniform(
low=-c, high=c, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(
low=-c, high=c, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv)
self.set_state(qpos, qvel)

observation = self._get_obs()
Expand Down
13 changes: 10 additions & 3 deletions softlearning/environments/gym/mujoco/swimmer_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
forward_reward_weight=1.0,
ctrl_cost_weight=1e-4,
reset_noise_scale=0.1,
exclude_current_positions_from_observation=True):
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight

self._reset_noise_scale = reset_noise_scale

self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)

Expand All @@ -18,6 +22,7 @@ def __init__(self,
self,
forward_reward_weight=self._forward_reward_weight,
ctrl_cost_weight=self._ctrl_cost_weight,
reset_noise_scale=self._reset_noise_scale,
exclude_current_positions_from_observation=(
self._exclude_current_positions_from_observation))

Expand Down Expand Up @@ -63,11 +68,13 @@ def _get_obs(self):
return observation

def reset_model(self):
c = 0.1
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale

qpos = self.init_qpos + self.np_random.uniform(
low=-c, high=c, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(
low=-c, high=c, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv)

self.set_state(qpos, qvel)

Expand Down
12 changes: 9 additions & 3 deletions softlearning/environments/gym/mujoco/walker2d_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(self,
terminate_when_unhealthy=True,
healthy_z_range=(0.8, 2.0),
healthy_angle_range=(-1.0, 1.0),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True):
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
Expand All @@ -29,6 +30,8 @@ def __init__(self,
self._healthy_z_range = healthy_z_range
self._healthy_angle_range = healthy_angle_range

self._reset_noise_scale = reset_noise_scale

self._exclude_current_positions_from_observation = (
exclude_current_positions_from_observation)

Expand All @@ -41,6 +44,7 @@ def __init__(self,
terminate_when_unhealthy=self._terminate_when_unhealthy,
healthy_z_range=self._healthy_z_range,
healthy_angle_range=self._healthy_angle_range,
reset_noise_scale=self._reset_noise_scale,
exclude_current_positions_from_observation=(
self._exclude_current_positions_from_observation),
)
Expand Down Expand Up @@ -113,11 +117,13 @@ def step(self, action):
return observation, reward, done, info

def reset_model(self):
c = 5e-3
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale

qpos = self.init_qpos + self.np_random.uniform(
low=-c, high=c, size=self.model.nq)
low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(
low=-c, high=c, size=self.model.nv)
low=noise_low, high=noise_high, size=self.model.nv)

self.set_state(qpos, qvel)

Expand Down

0 comments on commit 2c30571

Please sign in to comment.