-
Notifications
You must be signed in to change notification settings - Fork 8.6k
/
walker2d.py
40 lines (34 loc) · 1.39 KB
/
walker2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
utils.EzPickle.__init__(self)
def _step(self, a):
posbefore = self.model.data.qpos[0, 0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.model.data.qpos[0:3, 0]
alive_bonus = 1.0
reward = ((posafter - posbefore) / self.dt)
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
done = not (height > 0.8 and height < 2.0 and
ang > -1.0 and ang < 1.0)
ob = self._get_obs()
return ob, reward, done, {}
def _get_obs(self):
qpos = self.model.data.qpos
qvel = self.model.data.qvel
return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.5
self.viewer.cam.lookat[2] += .8
self.viewer.cam.elevation = -20