Skip to content

Commit

Permalink
Pendulum updates (#2423)
Browse files Browse the repository at this point in the history
* Pendulum env updates

Simplify the math a bit (no difference in behavior)

* Reorder the clipping of angular velocity

* Bump version of Pendulum

* black

* Update mentions of Pendulum-v0 to Pendulum-v1.
  • Loading branch information
RedTachyon committed Sep 25, 2021
1 parent 65eebce commit d199778
Show file tree
Hide file tree
Showing 9 changed files with 11 additions and 14 deletions.
2 changes: 1 addition & 1 deletion gym/envs/__init__.py
Expand Up @@ -42,7 +42,7 @@
)

register(
id="Pendulum-v0",
id="Pendulum-v1",
entry_point="gym.envs.classic_control:PendulumEnv",
max_episode_steps=200,
)
Expand Down
7 changes: 2 additions & 5 deletions gym/envs/classic_control/pendulum.py
Expand Up @@ -41,12 +41,9 @@ def step(self, u):
self.last_u = u # for rendering
costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)

newthdot = (
thdot
+ (-3 * g / (2 * l) * np.sin(th + np.pi) + 3.0 / (m * l ** 2) * u) * dt
)
newth = th + newthdot * dt
newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
newth = th + newthdot * dt

self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
Expand Down
2 changes: 1 addition & 1 deletion gym/wrappers/frame_stack.py
Expand Up @@ -64,7 +64,7 @@ class FrameStack(ObservationWrapper):
r"""Observation wrapper that stacks the observations in a rolling manner.
For example, if the number of stacks is 4, then the returned observation contains
the most recent 4 observations. For environment 'Pendulum-v0', the original observation
the most recent 4 observations. For environment 'Pendulum-v1', the original observation
is an array with shape [3], so if we stack 4 observations, the processed observation
has shape [4, 3].
Expand Down
2 changes: 1 addition & 1 deletion gym/wrappers/test_frame_stack.py
Expand Up @@ -12,7 +12,7 @@
lz4 = None


@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0", "Pong-v0"])
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1", "Pong-v0"])
@pytest.mark.parametrize("num_stack", [2, 3, 4])
@pytest.mark.parametrize(
"lz4_compress",
Expand Down
2 changes: 1 addition & 1 deletion gym/wrappers/test_record_episode_statistics.py
Expand Up @@ -4,7 +4,7 @@
from gym.wrappers import RecordEpisodeStatistics


@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v0"])
@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"])
@pytest.mark.parametrize("deque_size", [2, 5])
def test_record_episode_statistics(env_id, deque_size):
env = gym.make(env_id)
Expand Down
4 changes: 2 additions & 2 deletions gym/wrappers/test_rescale_action.py
Expand Up @@ -12,8 +12,8 @@ def test_rescale_action():
env = RescaleAction(env, -1, 1)
del env

env = gym.make("Pendulum-v0")
wrapped_env = RescaleAction(gym.make("Pendulum-v0"), -1, 1)
env = gym.make("Pendulum-v1")
wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1)

seed = 0
env.seed(seed)
Expand Down
2 changes: 1 addition & 1 deletion gym/wrappers/test_time_aware_observation.py
Expand Up @@ -4,7 +4,7 @@
from gym.wrappers import TimeAwareObservation


@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
def test_time_aware_observation(env_id):
env = gym.make(env_id)
wrapped_env = TimeAwareObservation(env)
Expand Down
2 changes: 1 addition & 1 deletion gym/wrappers/test_transform_observation.py
Expand Up @@ -6,7 +6,7 @@
from gym.wrappers import TransformObservation


@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
def test_transform_observation(env_id):
affine_transform = lambda x: 3 * x + 2
env = gym.make(env_id)
Expand Down
2 changes: 1 addition & 1 deletion gym/wrappers/test_transform_reward.py
Expand Up @@ -6,7 +6,7 @@
from gym.wrappers import TransformReward


@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
def test_transform_reward(env_id):
# use case #1: scale
scales = [0.1, 200]
Expand Down

0 comments on commit d199778

Please sign in to comment.