Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to mujoco-py 1.5 #834

Merged
merged 10 commits into from
Jan 24, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,89 +204,89 @@
# 2D

register(
id='Reacher-v1',
id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)

register(
id='Pusher-v0',
id='Pusher-v1',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these should all be the same version (even if that means some environments skip versions).

This makes it obvious that "all the v2+ mujoco environments use version 1.50" or something similar.

entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='Thrower-v0',
id='Thrower-v1',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='Striker-v0',
id='Striker-v1',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)

register(
id='InvertedPendulum-v1',
id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)

register(
id='InvertedDoublePendulum-v1',
id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)

register(
id='HalfCheetah-v1',
id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)

register(
id='Hopper-v1',
id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)

register(
id='Swimmer-v1',
id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)

register(
id='Walker2d-v1',
id='Walker2d-v2',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)

register(
id='Ant-v1',
id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)

register(
id='Humanoid-v1',
id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)

register(
id='HumanoidStandup-v1',
id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/ant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def _step(self, a):
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
Expand All @@ -30,9 +30,9 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[2:],
self.model.data.qvel.flat,
np.clip(self.model.data.cfrc_ext, -1, 1).flat,
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/half_cheetah.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, action):
xposbefore = self.model.data.qpos[0, 0]
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.model.data.qpos[0, 0]
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
Expand All @@ -20,8 +20,8 @@ def _step(self, action):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
self.model.data.qvel.flat,
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])

def reset_model(self):
Expand Down
8 changes: 4 additions & 4 deletions gym/envs/mujoco/hopper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _step(self, a):
posbefore = self.model.data.qpos[0, 0]
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0]
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
Expand All @@ -23,8 +23,8 @@ def _step(self, a):

def _get_obs(self):
return np.concatenate([
self.model.data.qpos.flat[1:],
np.clip(self.model.data.qvel.flat, -10, 10)
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])

def reset_model(self):
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/humanoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
Expand All @@ -13,7 +13,7 @@ def __init__(self):
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -22,17 +22,17 @@ def _get_obs(self):
data.cfrc_ext.flat])

def _step(self, a):
pos_before = mass_center(self.model)
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.model.data
data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.model.data.qpos
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)

Expand Down
11 changes: 3 additions & 8 deletions gym/envs/mujoco/humanoidstandup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,13 @@
from gym.envs.mujoco import mujoco_env
from gym import utils

def mass_center(model):
mass = model.body_mass
xpos = model.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self)

def _get_obs(self):
data = self.model.data
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
Expand All @@ -23,8 +18,8 @@ def _get_obs(self):

def _step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.model.data.qpos[2][0]
data = self.model.data
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep

quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
Expand Down
16 changes: 8 additions & 8 deletions gym/envs/mujoco/inverted_double_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ def __init__(self):
def _step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.model.data.site_xpos[0]
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.model.data.qvel[1:3]
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = (alive_bonus - dist_penalty - vel_penalty)[0]
r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1)
return ob, r, done, {}

def _get_obs(self):
return np.concatenate([
self.model.data.qpos[:1], # cart x pos
np.sin(self.model.data.qpos[1:]), # link angles
np.cos(self.model.data.qpos[1:]),
np.clip(self.model.data.qvel, -10, 10),
np.clip(self.model.data.qfrc_constraint, -10, 10)
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()

def reset_model(self):
Expand Down
2 changes: 1 addition & 1 deletion gym/envs/mujoco/inverted_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def reset_model(self):
return self._get_obs()

def _get_obs(self):
return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel()
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()

def viewer_setup(self):
v = self.viewer
Expand Down
49 changes: 19 additions & 30 deletions gym/envs/mujoco/mujoco_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

try:
import mujoco_py
from mujoco_py.mjlib import mjlib
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))

Expand All @@ -25,17 +24,18 @@ def __init__(self, model_path, frame_skip):
if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath)
self.frame_skip = frame_skip
self.model = mujoco_py.MjModel(fullpath)
self.data = self.model.data
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
self.viewer = None

self.metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
}

self.init_qpos = self.model.data.qpos.ravel().copy()
self.init_qvel = self.model.data.qvel.ravel().copy()
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
Expand Down Expand Up @@ -76,33 +76,33 @@ def viewer_setup(self):
# -----------------------------

def _reset(self):
mjlib.mj_resetData(self.model.ptr, self.data.ptr)
self.sim.reset()
ob = self.reset_model()
if self.viewer is not None:
self.viewer.autoscale()
self.viewer_setup()
return ob

def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
self.model.data.qpos = qpos
self.model.data.qvel = qvel
self.model._compute_subtree() # pylint: disable=W0212
self.model.forward()
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
self.sim.set_state(new_state)
self.sim.forward()

@property
def dt(self):
return self.model.opt.timestep * self.frame_skip

def do_simulation(self, ctrl, n_frames):
self.model.data.ctrl = ctrl
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.model.step()
self.sim.step()

def _render(self, mode='human', close=False):
if close:
if self.viewer is not None:
self._get_viewer().finish()
self._get_viewer()
self.viewer = None
return

Expand All @@ -111,30 +111,19 @@ def _render(self, mode='human', close=False):
data, width, height = self._get_viewer().get_image()
return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :]
elif mode == 'human':
self._get_viewer().loop_once()
self._get_viewer().render()

def _get_viewer(self):
if self.viewer is None:
self.viewer = mujoco_py.MjViewer()
self.viewer.start()
self.viewer.set_model(self.model)
self.viewer = mujoco_py.MjViewer(self.sim)
self.viewer_setup()
return self.viewer

def get_body_com(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.data.com_subtree[idx]

def get_body_comvel(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.body_comvels[idx]

def get_body_xmat(self, body_name):
idx = self.model.body_names.index(six.b(body_name))
return self.model.data.xmat[idx].reshape((3, 3))
return self.data.get_body_xpos(body_name)

def state_vector(self):
return np.concatenate([
self.model.data.qpos.flat,
self.model.data.qvel.flat
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])
Loading