diff --git a/README.md b/README.md index 59eca0c4..f887d465 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ - [x] [Toy text RL envs](https://envpool.readthedocs.io/en/latest/api/toy_text.html): Catch, FrozenLake, Taxi, NChain, CliffWalking, Blackjack - [x] [ViZDoom single player](https://envpool.readthedocs.io/en/latest/api/vizdoom.html) - [ ] [DeepMind Control Suite](https://envpool.readthedocs.io/en/latest/api/dm_control.html) -- [ ] Box2D +- [ ] [Box2D](https://envpool.readthedocs.io/en/latest/api/box2d.html) - [ ] Procgen - [ ] Minigrid diff --git a/docs/api/box2d.rst b/docs/api/box2d.rst new file mode 100644 index 00000000..58593f64 --- /dev/null +++ b/docs/api/box2d.rst @@ -0,0 +1,105 @@ +Box2D +===== + +We use ``box2d==2.4.1`` and ``gym==0.23.1`` as the codebase. See +https://github.com/erincatto/box2d/tree/v2.4.1 and +https://github.com/openai/gym/tree/v0.23.1/gym/envs/box2d + + +CarRacing-v1 +------------ + +The easiest control task to learn from pixels - a top-down racing environment. +The generated track is random every episode. + +Action Space +~~~~~~~~~~~~ + +There are 3 actions: steering (-1 for full left, 1 for full right), gas +(0 ~ 1), and breaking (0 ~ 1). + +Observation Space +~~~~~~~~~~~~~~~~~ + +State consists of 3 channel 96x96 pixels. + +Rewards +~~~~~~~ + +The reward is -0.1 every frame and +1000/N for every track tile visited, where +N is the total number of tiles visited in the track. For example, if you have +finished in 732 frames, your reward is 1000 - 0.1\*732 = 926.8 points. + +Starting State +~~~~~~~~~~~~~~ + +The car starts at rest in the center of the road. + +Episode Termination +~~~~~~~~~~~~~~~~~~~ + +The episode finishes when all of the tiles are visited. The car can also go +outside of the playfield - that is, far off the track, in which case it will +receive -100 reward and die. + +LunarLander-v2, LunarLanderContinuous-v2 +---------------------------------------- + +This environment is a classic rocket trajectory optimization problem. +According to Pontryagin's maximum principle, it is optimal to fire the +engine at full throttle or turn it off. This is the reason why this +environment has discrete actions: engine on or off. + +There are two environment versions: discrete or continuous. The landing pad is +always at coordinates (0,0). The coordinates are the first two numbers in the +state vector. Landing outside of the landing pad is possible. Fuel is +infinite, so an agent can learn to fly and then land on its first attempt. + +Action Space +~~~~~~~~~~~~ + +There are four discrete actions available: do nothing, fire left orientation +engine, fire main engine, fire right orientation engine. + +Observation Space +~~~~~~~~~~~~~~~~~ + +There are 8 states: the coordinates of the lander in ``x`` and ``y``, its +linear velocities in ``x`` and ``y``, its angle, its angular velocity, and two +booleans that represent whether each leg is in contact with the ground or not. + +Rewards +~~~~~~~ + +Reward for moving from the top of the screen to the landing pad and coming to +rest is about 100-140 points. If the lander moves away from the landing pad, +it loses reward. If the lander crashes, it receives an additional -100 points. +If it comes to rest, it receives an additional +100 points. Each leg with +ground contact is +10 points. Firing the main engine is -0.3 points each +frame. Firing the side engine is -0.03 points each frame. Solved is 200 +points. + +Starting State +~~~~~~~~~~~~~~ + +The lander starts at the top center of the viewport with a random initial +force applied to its center of mass. + +Episode Termination +~~~~~~~~~~~~~~~~~~~ + +The episode finishes if: + +1. the lander crashes (the lander body gets in contact with the moon); +2. the lander gets outside of the viewport (``x`` coordinate is greater than + 1); +3. the lander is not awake. From the `Box2D docs + `_, + a body which is not awake is a body which doesn't move and doesn't collide + with any other body: + + When Box2D determines that a body (or group of bodies) has come to rest, + the body enters a sleep state which has very little CPU overhead. If a + body is awake and collides with a sleeping body, then the sleeping body + wakes up. Bodies will also wake up if a joint or contact attached to + them is destroyed. diff --git a/docs/index.rst b/docs/index.rst index aff60b04..8fc8fe89 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -73,9 +73,10 @@ stable version through `envpool.readthedocs.io/en/stable/ :caption: Environments api/atari + api/box2d api/classic - api/mujoco api/dm_control + api/mujoco api/toy_text api/vizdoom diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 70043b4e..f4bd01e0 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -57,3 +57,7 @@ Minigrid Garena Tianshou namedtuple +playfield +Pontryagin +booleans +viewport diff --git a/envpool/BUILD b/envpool/BUILD index 64770059..1116a80d 100644 --- a/envpool/BUILD +++ b/envpool/BUILD @@ -17,6 +17,7 @@ py_library( srcs = ["entry.py"], deps = [ "//envpool/atari:atari_registration", + "//envpool/box2d:box2d_registration", "//envpool/classic_control:classic_control_registration", "//envpool/mujoco:mujoco_registration", "//envpool/toy_text:toy_text_registration", @@ -31,6 +32,7 @@ py_library( ":entry", ":registration", "//envpool/atari", + "//envpool/box2d", "//envpool/classic_control", "//envpool/mujoco", "//envpool/python", diff --git a/envpool/box2d/BUILD b/envpool/box2d/BUILD new file mode 100644 index 00000000..def3e749 --- /dev/null +++ b/envpool/box2d/BUILD @@ -0,0 +1,67 @@ +load("@pip_requirements//:requirements.bzl", "requirement") +load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "box2d_env", + srcs = ["lunar_lander.cc"], + hdrs = [ + "lunar_lander.h", + "lunar_lander_continuous.h", + "lunar_lander_discrete.h", + ], + deps = [ + "//envpool/core:async_envpool", + "@box2d", + ], +) + +pybind_extension( + name = "box2d_envpool", + srcs = ["box2d_envpool.cc"], + deps = [ + ":box2d_env", + "//envpool/core:py_envpool", + ], +) + +py_library( + name = "box2d", + srcs = ["__init__.py"], + data = [":box2d_envpool.so"], + deps = ["//envpool/python:api"], +) + +py_test( + name = "box2d_deterministic_test", + size = "enormous", + srcs = ["box2d_deterministic_test.py"], + deps = [ + ":box2d", + requirement("absl-py"), + requirement("numpy"), + ], +) + +py_test( + name = "box2d_correctness_test", + size = "enormous", + srcs = ["box2d_correctness_test.py"], + deps = [ + ":box2d", + requirement("absl-py"), + requirement("gym"), + requirement("box2d"), + requirement("pygame"), + requirement("numpy"), + ], +) + +py_library( + name = "box2d_registration", + srcs = ["registration.py"], + deps = [ + "//envpool:registration", + ], +) diff --git a/envpool/box2d/__init__.py b/envpool/box2d/__init__.py new file mode 100644 index 00000000..f6cc9b29 --- /dev/null +++ b/envpool/box2d/__init__.py @@ -0,0 +1,44 @@ +# Copyright 2022 Garena Online Private Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Box2D env in EnvPool.""" + +from envpool.python.api import py_env + +from .box2d_envpool import ( + _LunarLanderContinuousEnvPool, + _LunarLanderContinuousEnvSpec, + _LunarLanderDiscreteEnvPool, + _LunarLanderDiscreteEnvSpec, +) + +( + LunarLanderContinuousEnvSpec, + LunarLanderContinuousDMEnvPool, + LunarLanderContinuousGymEnvPool, +) = py_env(_LunarLanderContinuousEnvSpec, _LunarLanderContinuousEnvPool) + +( + LunarLanderDiscreteEnvSpec, + LunarLanderDiscreteDMEnvPool, + LunarLanderDiscreteGymEnvPool, +) = py_env(_LunarLanderDiscreteEnvSpec, _LunarLanderDiscreteEnvPool) + +__all__ = [ + "LunarLanderContinuousEnvSpec", + "LunarLanderContinuousDMEnvPool", + "LunarLanderContinuousGymEnvPool", + "LunarLanderDiscreteEnvSpec", + "LunarLanderDiscreteDMEnvPool", + "LunarLanderDiscreteGymEnvPool", +] diff --git a/envpool/box2d/box2d_correctness_test.py b/envpool/box2d/box2d_correctness_test.py new file mode 100644 index 00000000..b4e5801e --- /dev/null +++ b/envpool/box2d/box2d_correctness_test.py @@ -0,0 +1,127 @@ +# Copyright 2022 Garena Online Private Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for box2d environments correctness check.""" + +from typing import Any, no_type_check + +import gym +import numpy as np +from absl import logging +from absl.testing import absltest + +from envpool.box2d import ( + LunarLanderContinuousEnvSpec, + LunarLanderContinuousGymEnvPool, + LunarLanderDiscreteEnvSpec, + LunarLanderDiscreteGymEnvPool, +) + + +class _Box2dEnvPoolCorrectnessTest(absltest.TestCase): + + @no_type_check + def run_space_check(self, env0: gym.Env, env1: Any) -> None: + """Check observation_space and action space.""" + obs0, obs1 = env0.observation_space, env1.observation_space + np.testing.assert_allclose(obs0.shape, obs1.shape) + act0, act1 = env0.action_space, env1.action_space + if isinstance(act0, gym.spaces.Box): + np.testing.assert_allclose(act0.low, act1.low) + np.testing.assert_allclose(act0.high, act1.high) + elif isinstance(act0, gym.spaces.Discrete): + np.testing.assert_allclose(act0.n, act1.n) + + def test_lunar_lander_space(self) -> None: + env0 = gym.make("LunarLander-v2") + env1 = LunarLanderDiscreteGymEnvPool( + LunarLanderDiscreteEnvSpec(LunarLanderDiscreteEnvSpec.gen_config()) + ) + self.run_space_check(env0, env1) + + env0 = gym.make("LunarLanderContinuous-v2") + env1 = LunarLanderContinuousGymEnvPool( + LunarLanderContinuousEnvSpec(LunarLanderContinuousEnvSpec.gen_config()) + ) + self.run_space_check(env0, env1) + + def heuristic_lunar_lander_policy( + self, s: np.ndarray, continuous: bool + ) -> np.ndarray: + angle_targ = np.clip(s[0] * 0.5 + s[2] * 1.0, -0.4, 0.4) + hover_targ = 0.55 * np.abs(s[0]) + angle_todo = (angle_targ - s[4]) * 0.5 - s[5] * 1.0 + hover_todo = (hover_targ - s[1]) * 0.5 - s[3] * 0.5 + + if s[6] or s[7]: + angle_todo = 0 + hover_todo = -(s[3]) * 0.5 + + if continuous: + a = np.array([hover_todo * 20 - 1, -angle_todo * 20]) + a = np.clip(a, -1, 1) + else: + a = 0 + if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: + a = 2 + elif angle_todo < -0.05: + a = 3 + elif angle_todo > 0.05: + a = 1 + return a + + def solve_lunar_lander(self, num_envs: int, continuous: bool) -> None: + if continuous: + env = LunarLanderContinuousGymEnvPool( + LunarLanderContinuousEnvSpec( + LunarLanderContinuousEnvSpec.gen_config(num_envs=num_envs) + ) + ) + else: + env = LunarLanderDiscreteGymEnvPool( + LunarLanderDiscreteEnvSpec( + LunarLanderDiscreteEnvSpec.gen_config(num_envs=num_envs) + ) + ) + # each env run two episodes + for _ in range(2): + env_id = np.arange(num_envs) + done = np.array([False] * num_envs) + obs = env.reset(env_id) + rewards = np.zeros(num_envs) + while not np.all(done): + action = np.array( + [self.heuristic_lunar_lander_policy(s, continuous) for s in obs] + ) + obs, rew, done, info = env.step(action, env_id) + env_id = info["env_id"] + rewards[env_id] += rew + obs = obs[~done] + env_id = env_id[~done] + mean_reward = np.mean(rewards) + logging.info( + f"{continuous}, {np.mean(rewards):.6f} ± {np.std(rewards):.6f}" + ) + # the following number is from gym's 1000 episode mean reward + if continuous: # 283.872619 ± 18.881830 + self.assertTrue(abs(mean_reward - 284) < 10, (continuous, mean_reward)) + else: # 236.898334 ± 105.832610 + self.assertTrue(abs(mean_reward - 237) < 20, (continuous, mean_reward)) + + def test_lunar_lander_correctness(self, num_envs: int = 30) -> None: + self.solve_lunar_lander(num_envs, True) + self.solve_lunar_lander(num_envs, False) + + +if __name__ == "__main__": + absltest.main() diff --git a/envpool/box2d/box2d_deterministic_test.py b/envpool/box2d/box2d_deterministic_test.py new file mode 100644 index 00000000..b2e225e7 --- /dev/null +++ b/envpool/box2d/box2d_deterministic_test.py @@ -0,0 +1,66 @@ +# Copyright 2022 Garena Online Private Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for box2d environments deterministic check.""" + +from typing import Any + +import numpy as np +from absl.testing import absltest + +from envpool.box2d import ( + LunarLanderContinuousEnvSpec, + LunarLanderContinuousGymEnvPool, + LunarLanderDiscreteEnvSpec, + LunarLanderDiscreteGymEnvPool, +) + + +class _Box2dEnvPoolDeterministicTest(absltest.TestCase): + + def run_deterministic_check( + self, + spec_cls: Any, + envpool_cls: Any, + num_envs: int = 4, + **kwargs: Any, + ) -> None: + env0 = envpool_cls( + spec_cls(spec_cls.gen_config(num_envs=num_envs, seed=0, **kwargs)) + ) + env1 = envpool_cls( + spec_cls(spec_cls.gen_config(num_envs=num_envs, seed=0, **kwargs)) + ) + env2 = envpool_cls( + spec_cls(spec_cls.gen_config(num_envs=num_envs, seed=1, **kwargs)) + ) + act_space = env0.action_space + for _ in range(5000): + action = np.array([act_space.sample() for _ in range(num_envs)]) + obs0 = env0.step(action)[0] + obs1 = env1.step(action)[0] + obs2 = env2.step(action)[0] + np.testing.assert_allclose(obs0, obs1) + self.assertFalse(np.allclose(obs0, obs2)) + + def test_lunar_lander(self) -> None: + self.run_deterministic_check( + LunarLanderContinuousEnvSpec, LunarLanderContinuousGymEnvPool + ) + self.run_deterministic_check( + LunarLanderDiscreteEnvSpec, LunarLanderDiscreteGymEnvPool + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/envpool/box2d/box2d_envpool.cc b/envpool/box2d/box2d_envpool.cc new file mode 100644 index 00000000..1b6c6c0c --- /dev/null +++ b/envpool/box2d/box2d_envpool.cc @@ -0,0 +1,30 @@ +// Copyright 2022 Garena Online Private Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "envpool/box2d/lunar_lander_continuous.h" +#include "envpool/box2d/lunar_lander_discrete.h" +#include "envpool/core/py_envpool.h" + +using LunarLanderContinuousEnvSpec = + PyEnvSpec; +using LunarLanderContinuousEnvPool = + PyEnvPool; + +using LunarLanderDiscreteEnvSpec = PyEnvSpec; +using LunarLanderDiscreteEnvPool = PyEnvPool; + +PYBIND11_MODULE(box2d_envpool, m) { + REGISTER(m, LunarLanderContinuousEnvSpec, LunarLanderContinuousEnvPool) + REGISTER(m, LunarLanderDiscreteEnvSpec, LunarLanderDiscreteEnvPool) +} diff --git a/envpool/box2d/lunar_lander.cc b/envpool/box2d/lunar_lander.cc new file mode 100644 index 00000000..121c8026 --- /dev/null +++ b/envpool/box2d/lunar_lander.cc @@ -0,0 +1,326 @@ +// Copyright 2022 Garena Online Private Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "envpool/box2d/lunar_lander.h" + +#include + +namespace box2d { + +// this function is to pass clang-tidy conversion check +static b2Vec2 Vec2(double x, double y) { + return b2Vec2(static_cast(x), static_cast(y)); +} + +ContactDetector::ContactDetector(LunarLanderEnv* env) : env_(env) {} + +void ContactDetector::BeginContact(b2Contact* contact) { + b2Body* body_a = contact->GetFixtureA()->GetBody(); + b2Body* body_b = contact->GetFixtureB()->GetBody(); + if (env_->lander_ == body_a || env_->lander_ == body_b) { + env_->done_ = true; + } + if (env_->legs_[0] == body_a || env_->legs_[0] == body_b) { + env_->ground_contact_[0] = 1; + } + if (env_->legs_[1] == body_a || env_->legs_[1] == body_b) { + env_->ground_contact_[1] = 1; + } +} + +void ContactDetector::EndContact(b2Contact* contact) { + b2Body* body_a = contact->GetFixtureA()->GetBody(); + b2Body* body_b = contact->GetFixtureB()->GetBody(); + if (env_->legs_[0] == body_a || env_->legs_[0] == body_b) { + env_->ground_contact_[0] = 0; + } + if (env_->legs_[1] == body_a || env_->legs_[1] == body_b) { + env_->ground_contact_[1] = 0; + } +} + +LunarLanderEnv::LunarLanderEnv(bool continuous, int max_episode_steps) + : max_episode_steps_(max_episode_steps), + elapsed_step_(max_episode_steps + 1), + continuous_(continuous), + done_(true), + world_(new b2World(b2Vec2(0.0, -10.0))), + moon_(nullptr), + lander_(nullptr), + dist_(0, 1) { + for (const auto* p : kLanderPoly) { + lander_poly_.emplace_back(Vec2(p[0] / kScale, p[1] / kScale)); + } +} + +void LunarLanderEnv::ResetBox2d(std::mt19937* gen) { + // clean all body in world + if (moon_ != nullptr) { + world_->SetContactListener(nullptr); + for (auto& p : particles_) { + world_->DestroyBody(p); + } + particles_.clear(); + world_->DestroyBody(moon_); + world_->DestroyBody(lander_); + world_->DestroyBody(legs_[0]); + world_->DestroyBody(legs_[1]); + } + listener_ = std::make_unique(this); + world_->SetContactListener(listener_.get()); + double w = kViewportW / kScale; + double h = kViewportH / kScale; + + // moon + std::array height; + std::array chunk_x; + std::array smooth_y; + double helipad_y = h / 4; + for (int i = 0; i <= kChunks; ++i) { + if (kChunks / 2 - 2 <= i && i <= kChunks / 2 + 2) { + height[i] = helipad_y; + } else { + height[i] = dist_(*gen) * h / 2; + } + } + for (int i = 0; i < kChunks; ++i) { + chunk_x[i] = w / (kChunks - 1) * i; + smooth_y[i] = + (height[i == 0 ? kChunks : i - 1] + height[i] + height[i + 1]) / 3; + } + { + b2BodyDef bd; + bd.type = b2_staticBody; + + b2EdgeShape shape; + shape.SetTwoSided(b2Vec2(0, 0), Vec2(w, 0)); + + moon_ = world_->CreateBody(&bd); + moon_->CreateFixture(&shape, 0); + } + for (int i = 0; i < kChunks - 1; ++i) { + b2EdgeShape shape; + shape.SetTwoSided(b2Vec2(chunk_x[i], smooth_y[i]), + b2Vec2(chunk_x[i + 1], smooth_y[i + 1])); + + b2FixtureDef fd; + fd.shape = &shape; + fd.friction = 0.1; + fd.density = 0; + + moon_->CreateFixture(&fd); + } + + // lander + double initial_x = w / 2; + double initial_y = h; + { + b2BodyDef bd; + bd.type = b2_dynamicBody; + bd.position = Vec2(initial_x, initial_y); + bd.angle = 0.0; + + b2PolygonShape polygon; + polygon.Set(lander_poly_.data(), lander_poly_.size()); + + b2FixtureDef fd; + fd.shape = &polygon; + fd.density = 5.0; + fd.friction = 0.1; + fd.filter.categoryBits = 0x0010; + fd.filter.maskBits = 0x001; + fd.restitution = 0.0; + + lander_ = world_->CreateBody(&bd); + lander_->CreateFixture(&fd); + b2Vec2 force = Vec2(dist_(*gen) * 2 * kInitialRandom - kInitialRandom, + dist_(*gen) * 2 * kInitialRandom - kInitialRandom); + lander_->ApplyForceToCenter(force, true); + } + + // legs + for (int index = 0; index < 2; ++index) { + float sign = index == 0 ? -1 : 1; + + b2BodyDef bd; + bd.type = b2_dynamicBody; + bd.position = Vec2(initial_x - sign * kLegAway / kScale, initial_y); + bd.angle = sign * 0.05f; + + b2PolygonShape shape; + shape.SetAsBox(static_cast(kLegW / kScale), + static_cast(kLegH / kScale)); + + b2FixtureDef fd; + fd.shape = &shape; + fd.density = 1.0; + fd.filter.categoryBits = 0x0020; + fd.filter.maskBits = 0x001; + fd.restitution = 0.0; + + legs_[index] = world_->CreateBody(&bd); + legs_[index]->CreateFixture(&fd); + ground_contact_[index] = 0; + + b2RevoluteJointDef rjd; + rjd.bodyA = lander_; + rjd.bodyB = legs_[index]; + rjd.localAnchorA.SetZero(); + rjd.localAnchorB = Vec2(sign * kLegAway / kScale, kLegDown / kScale); + rjd.enableMotor = true; + rjd.enableLimit = true; + rjd.maxMotorTorque = static_cast(kLegSpringTorque); + rjd.motorSpeed = sign * 0.3f; + rjd.lowerAngle = index == 0 ? 0.4 : -0.9; + rjd.upperAngle = index == 0 ? 0.9 : -0.4; + world_->CreateJoint(&rjd); + } +} + +b2Body* LunarLanderEnv::CreateParticle(float mass, b2Vec2 pos) { + b2BodyDef bd; + bd.type = b2_dynamicBody; + bd.position = pos; + bd.angle = 0.0; + + b2CircleShape shape; + shape.m_radius = static_cast(2 / kScale); + shape.m_p.SetZero(); + + b2FixtureDef fd; + fd.shape = &shape; + fd.density = mass; + fd.friction = 0.1; + fd.filter.categoryBits = 0x0100; + fd.filter.maskBits = 0x001; + fd.restitution = 0.3; + + auto* p = world_->CreateBody(&bd); + p->CreateFixture(&fd); + particles_.emplace_back(p); + return p; +} + +void LunarLanderEnv::StepBox2d(std::mt19937* gen, int action, float action0, + float action1) { + action0 = std::min(std::max(action0, -1.0f), 1.0f); + action1 = std::min(std::max(action1, -1.0f), 1.0f); + std::array tip; + std::array side; + std::array dispersion; + tip[0] = std::sin(lander_->GetAngle()); + tip[1] = std::cos(lander_->GetAngle()); + side[0] = -tip[1]; + side[1] = tip[0]; + dispersion[0] = (dist_(*gen) * 2 - 1) / kScale; + dispersion[1] = (dist_(*gen) * 2 - 1) / kScale; + + // main engine + double m_power = 0.0; + if ((continuous_ && action0 > 0) || (!continuous_ && action == 2)) { + if (continuous_) { + m_power = (std::min(std::max(action0, 0.0f), 1.0f) + 1) * 0.5; + } else { + m_power = 1.0; + } + double tmp = 4 / kScale + 2 * dispersion[0]; + double ox = tip[0] * tmp + side[0] * dispersion[1]; + double oy = -tip[1] * tmp - side[1] * dispersion[1]; + auto impulse_pos = Vec2(ox, oy); + impulse_pos += lander_->GetPosition(); + auto* p = CreateParticle(3.5, impulse_pos); + auto impulse = + Vec2(ox * kMainEnginePower * m_power, oy * kMainEnginePower * m_power); + p->ApplyLinearImpulse(impulse, impulse_pos, true); + lander_->ApplyLinearImpulse(-impulse, impulse_pos, true); + } + + // orientation engines + double s_power = 0.0; + if ((continuous_ && std::abs(action1) > 0.5) || + (!continuous_ && (action == 1 || action == 3))) { + double direction; + if (continuous_) { + float eps = 1e-8; + direction = action1 > eps ? 1 : action1 < -eps ? -1 : 0; + s_power = std::min(std::max(std::abs(action1), 0.5f), 1.0f); + } else { + direction = action - 2; + s_power = 1.0; + } + double tmp = 3 * dispersion[1] + direction * kSideEngineAway / kScale; + double ox = tip[0] * dispersion[0] + side[0] * tmp; + double oy = -tip[1] * dispersion[0] - side[1] * tmp; + auto impulse_pos = Vec2(ox - tip[0] * 17 / kScale, + oy + tip[1] * kSideEngineHeight / kScale); + impulse_pos += lander_->GetPosition(); + auto* p = CreateParticle(0.7, impulse_pos); + auto impulse = + Vec2(ox * kSideEnginePower * s_power, oy * kSideEnginePower * s_power); + p->ApplyLinearImpulse(impulse, impulse_pos, true); + lander_->ApplyLinearImpulse(-impulse, impulse_pos, true); + } + + world_->Step(static_cast(1.0 / kFPS), 6 * 30, 2 * 30); + + // state and reward + auto pos = lander_->GetPosition(); + auto vel = lander_->GetLinearVelocity(); + double w = kViewportW / kScale; + double h = kViewportH / kScale; + obs_[0] = (pos.x - w / 2) / (w / 2); + obs_[1] = (pos.y - h / 4 - kLegDown / kScale) / (h / 2); + obs_[2] = vel.x * w / 2 / kFPS; + obs_[3] = vel.y * h / 2 / kFPS; + obs_[4] = lander_->GetAngle(); + obs_[5] = lander_->GetAngularVelocity() * 20 / kFPS; + obs_[6] = ground_contact_[0]; + obs_[7] = ground_contact_[1]; + reward_ = 0; + float shaping = -100 * (std::sqrt(obs_[0] * obs_[0] + obs_[1] * obs_[1]) + + std::sqrt(obs_[2] * obs_[2] + obs_[3] * obs_[3]) + + std::abs(obs_[4])) + + 10 * (obs_[6] + obs_[7]); + if (elapsed_step_ > 0) { + reward_ = shaping - prev_shaping_; + } + prev_shaping_ = shaping; + reward_ -= static_cast(m_power * 0.3 + s_power * 0.03); + if (done_ || std::abs(obs_[0]) >= 1) { + done_ = true; + reward_ = -100; + } + if (!lander_->IsAwake()) { + done_ = true; + reward_ = 100; + } + if (elapsed_step_ >= max_episode_steps_) { + done_ = true; + } +} + +void LunarLanderEnv::LunarLanderReset(std::mt19937* gen) { + elapsed_step_ = -1; // because of the step(0) + done_ = false; + ResetBox2d(gen); + LunarLanderStep(gen, 0, 0, 0); +} + +void LunarLanderEnv::LunarLanderStep(std::mt19937* gen, int action, + float action0, float action1) { + ++elapsed_step_; + StepBox2d(gen, action, action0, action1); +} + +} // namespace box2d diff --git a/envpool/box2d/lunar_lander.h b/envpool/box2d/lunar_lander.h new file mode 100644 index 00000000..caf520b3 --- /dev/null +++ b/envpool/box2d/lunar_lander.h @@ -0,0 +1,94 @@ +/* + * Copyright 2022 Garena Online Private Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// https://github.com/openai/gym/blob/0.23.1/gym/envs/box2d/lunar_lander.py + +#ifndef ENVPOOL_BOX2D_LUNAR_LANDER_H_ +#define ENVPOOL_BOX2D_LUNAR_LANDER_H_ + +#include + +#include +#include +#include +#include + +namespace box2d { + +class ContactDetector; + +class LunarLanderEnv { + const double kFPS = 50; + const double kScale = 30.0; + const double kMainEnginePower = 13.0; + const double kSideEnginePower = 0.6; + const double kInitialRandom = 1000.0; + const double kLanderPoly[6][2] = { // NOLINT + {-14, 17}, {-17, 0}, {-17, -10}, {17, -10}, {17, 0}, {14, 17}}; + const double kLegAway = 20; + const double kLegDown = 18; + const double kLegW = 2; + const double kLegH = 8; + const double kLegSpringTorque = 40; + const double kSideEngineHeight = 14.0; + const double kSideEngineAway = 12.0; + const double kViewportW = 600; + const double kViewportH = 400; + static const int kChunks = 11; + + friend class ContactDetector; + + protected: + int max_episode_steps_, elapsed_step_; + float reward_, prev_shaping_; + bool continuous_, done_; + std::array obs_; + + // box2d related + std::unique_ptr world_; + b2Body *moon_, *lander_; + std::vector particles_; + std::vector lander_poly_; + std::array legs_; + std::array ground_contact_; + std::unique_ptr listener_; + std::uniform_real_distribution<> dist_; + + public: + LunarLanderEnv(bool continuous, int max_episode_steps); + void LunarLanderReset(std::mt19937* gen); + // discrete action space: action + // continuous action space: action0 and action1 + void LunarLanderStep(std::mt19937* gen, int action, float action0, + float action1); + + private: + void ResetBox2d(std::mt19937* gen); + void StepBox2d(std::mt19937* gen, int action, float action0, float action1); + b2Body* CreateParticle(float mass, b2Vec2 pos); +}; + +class ContactDetector : public b2ContactListener { + LunarLanderEnv* env_; + + public: + explicit ContactDetector(LunarLanderEnv* env); + void BeginContact(b2Contact* contact) override; + void EndContact(b2Contact* contact) override; +}; + +} // namespace box2d + +#endif // ENVPOOL_BOX2D_LUNAR_LANDER_H_ diff --git a/envpool/box2d/lunar_lander_continuous.h b/envpool/box2d/lunar_lander_continuous.h new file mode 100644 index 00000000..245ea229 --- /dev/null +++ b/envpool/box2d/lunar_lander_continuous.h @@ -0,0 +1,77 @@ +/* + * Copyright 2022 Garena Online Private Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENVPOOL_BOX2D_LUNAR_LANDER_CONTINUOUS_H_ +#define ENVPOOL_BOX2D_LUNAR_LANDER_CONTINUOUS_H_ + +#include "envpool/box2d/lunar_lander.h" +#include "envpool/core/async_envpool.h" +#include "envpool/core/env.h" + +namespace box2d { + +class LunarLanderContinuousEnvFns { + public: + static decltype(auto) DefaultConfig() { + return MakeDict("max_episode_steps"_.Bind(1000), + "reward_threshold"_.Bind(200.0)); + } + template + static decltype(auto) StateSpec(const Config& conf) { + return MakeDict("obs"_.Bind(Spec({8}))); + } + template + static decltype(auto) ActionSpec(const Config& conf) { + return MakeDict("action"_.Bind(Spec({2}, {-1.0, 1.0}))); + } +}; + +using LunarLanderContinuousEnvSpec = EnvSpec; + +class LunarLanderContinuousEnv : public Env, + public LunarLanderEnv { + public: + LunarLanderContinuousEnv(const Spec& spec, int env_id) + : Env(spec, env_id), + LunarLanderEnv(true, spec.config["max_episode_steps"_]) {} + + bool IsDone() override { return done_; } + + void Reset() override { + LunarLanderReset(&gen_); + WriteState(); + } + + void Step(const Action& action) override { + float action0 = action["action"_][0]; + float action1 = action["action"_][1]; + LunarLanderStep(&gen_, 0, action0, action1); + WriteState(); + } + + private: + void WriteState() { + State state = Allocate(); + state["reward"_] = reward_; + state["obs"_].Assign(obs_.begin(), obs_.size()); + } +}; + +using LunarLanderContinuousEnvPool = AsyncEnvPool; + +} // namespace box2d + +#endif // ENVPOOL_BOX2D_LUNAR_LANDER_CONTINUOUS_H_ diff --git a/envpool/box2d/lunar_lander_discrete.h b/envpool/box2d/lunar_lander_discrete.h new file mode 100644 index 00000000..7ce4a188 --- /dev/null +++ b/envpool/box2d/lunar_lander_discrete.h @@ -0,0 +1,76 @@ +/* + * Copyright 2022 Garena Online Private Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENVPOOL_BOX2D_LUNAR_LANDER_DISCRETE_H_ +#define ENVPOOL_BOX2D_LUNAR_LANDER_DISCRETE_H_ + +#include "envpool/box2d/lunar_lander.h" +#include "envpool/core/async_envpool.h" +#include "envpool/core/env.h" + +namespace box2d { + +class LunarLanderDiscreteEnvFns { + public: + static decltype(auto) DefaultConfig() { + return MakeDict("max_episode_steps"_.Bind(1000), + "reward_threshold"_.Bind(200.0)); + } + template + static decltype(auto) StateSpec(const Config& conf) { + return MakeDict("obs"_.Bind(Spec({8}))); + } + template + static decltype(auto) ActionSpec(const Config& conf) { + return MakeDict("action"_.Bind(Spec({-1}, {0, 3}))); + } +}; + +using LunarLanderDiscreteEnvSpec = EnvSpec; + +class LunarLanderDiscreteEnv : public Env, + public LunarLanderEnv { + public: + LunarLanderDiscreteEnv(const Spec& spec, int env_id) + : Env(spec, env_id), + LunarLanderEnv(false, spec.config["max_episode_steps"_]) {} + + bool IsDone() override { return done_; } + + void Reset() override { + LunarLanderReset(&gen_); + WriteState(); + } + + void Step(const Action& action) override { + int act = action["action"_]; + LunarLanderStep(&gen_, act, 0, 0); + WriteState(); + } + + private: + void WriteState() { + State state = Allocate(); + state["reward"_] = reward_; + state["obs"_].Assign(obs_.begin(), obs_.size()); + } +}; + +using LunarLanderDiscreteEnvPool = AsyncEnvPool; + +} // namespace box2d + +#endif // ENVPOOL_BOX2D_LUNAR_LANDER_DISCRETE_H_ diff --git a/envpool/box2d/registration.py b/envpool/box2d/registration.py new file mode 100644 index 00000000..4ad43e99 --- /dev/null +++ b/envpool/box2d/registration.py @@ -0,0 +1,32 @@ +# Copyright 2022 Garena Online Private Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Box2D env registration.""" + +from envpool.registration import register + +register( + task_id="LunarLander-v2", + import_path="envpool.box2d", + spec_cls="LunarLanderDiscreteEnvSpec", + dm_cls="LunarLanderDiscreteDMEnvPool", + gym_cls="LunarLanderDiscreteGymEnvPool", +) + +register( + task_id="LunarLanderContinuous-v2", + import_path="envpool.box2d", + spec_cls="LunarLanderContinuousEnvSpec", + dm_cls="LunarLanderContinuousDMEnvPool", + gym_cls="LunarLanderContinuousGymEnvPool", +) diff --git a/envpool/entry.py b/envpool/entry.py index 1aa7e361..98d2f7ab 100644 --- a/envpool/entry.py +++ b/envpool/entry.py @@ -14,6 +14,7 @@ """Entry point for all envs' registration.""" import envpool.atari.registration # noqa: F401 +import envpool.box2d.registration # noqa: F401 import envpool.classic_control.registration # noqa: F401 import envpool.mujoco.registration # noqa: F401 import envpool.toy_text.registration # noqa: F401 diff --git a/envpool/make_test.py b/envpool/make_test.py index adcbb3ff..3596b766 100644 --- a/envpool/make_test.py +++ b/envpool/make_test.py @@ -90,6 +90,12 @@ def test_make_toytext(self) -> None: ] ) + def test_make_box2d(self) -> None: + self.check_step([ + "LunarLander-v2", + "LunarLanderContinuous-v2", + ]) + def test_make_mujoco_gym(self) -> None: self.check_step( [ diff --git a/envpool/workspace0.bzl b/envpool/workspace0.bzl index 332e6126..988fd29a 100644 --- a/envpool/workspace0.bzl +++ b/envpool/workspace0.bzl @@ -339,6 +339,18 @@ def workspace(): build_file = "//third_party/mujoco_dmc_xml:mujoco_dmc_xml.BUILD", ) + maybe( + http_archive, + name = "box2d", + sha256 = "d6b4650ff897ee1ead27cf77a5933ea197cbeef6705638dd181adc2e816b23c2", + strip_prefix = "box2d-2.4.1", + urls = [ + "https://github.com/erincatto/box2d/archive/refs/tags/v2.4.1.tar.gz", + "https://ml.cs.tsinghua.edu.cn/~jiayi/envpool/erincatto/box2d/v2.4.1.tar.gz", + ], + build_file = "//third_party/box2d:box2d.BUILD", + ) + # Atari/VizDoom pretrained weight for testing pipeline maybe( diff --git a/setup.cfg b/setup.cfg index 99db9218..110c59f0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ envpool = atari/*.so mujoco/*.so.* mujoco/assets*/*.xml mujoco/assets*/*/*.xml + box2d/*.so [yapf] based_on_style = yapf diff --git a/third_party/box2d/BUILD b/third_party/box2d/BUILD new file mode 100644 index 00000000..e69de29b diff --git a/third_party/box2d/box2d.BUILD b/third_party/box2d/box2d.BUILD new file mode 100644 index 00000000..8a180130 --- /dev/null +++ b/third_party/box2d/box2d.BUILD @@ -0,0 +1,69 @@ +cc_library( + name = "box2d", + srcs = glob(["include/box2d/*.h"]) + [ + "src/collision/b2_broad_phase.cpp", + "src/collision/b2_chain_shape.cpp", + "src/collision/b2_circle_shape.cpp", + "src/collision/b2_collide_circle.cpp", + "src/collision/b2_collide_edge.cpp", + "src/collision/b2_collide_polygon.cpp", + "src/collision/b2_collision.cpp", + "src/collision/b2_distance.cpp", + "src/collision/b2_dynamic_tree.cpp", + "src/collision/b2_edge_shape.cpp", + "src/collision/b2_polygon_shape.cpp", + "src/collision/b2_time_of_impact.cpp", + "src/common/b2_block_allocator.cpp", + "src/common/b2_draw.cpp", + "src/common/b2_math.cpp", + "src/common/b2_settings.cpp", + "src/common/b2_stack_allocator.cpp", + "src/common/b2_timer.cpp", + "src/dynamics/b2_body.cpp", + "src/dynamics/b2_chain_circle_contact.cpp", + "src/dynamics/b2_chain_circle_contact.h", + "src/dynamics/b2_chain_polygon_contact.cpp", + "src/dynamics/b2_chain_polygon_contact.h", + "src/dynamics/b2_circle_contact.cpp", + "src/dynamics/b2_circle_contact.h", + "src/dynamics/b2_contact.cpp", + "src/dynamics/b2_contact_manager.cpp", + "src/dynamics/b2_contact_solver.cpp", + "src/dynamics/b2_contact_solver.h", + "src/dynamics/b2_distance_joint.cpp", + "src/dynamics/b2_edge_circle_contact.cpp", + "src/dynamics/b2_edge_circle_contact.h", + "src/dynamics/b2_edge_polygon_contact.cpp", + "src/dynamics/b2_edge_polygon_contact.h", + "src/dynamics/b2_fixture.cpp", + "src/dynamics/b2_friction_joint.cpp", + "src/dynamics/b2_gear_joint.cpp", + "src/dynamics/b2_island.cpp", + "src/dynamics/b2_island.h", + "src/dynamics/b2_joint.cpp", + "src/dynamics/b2_motor_joint.cpp", + "src/dynamics/b2_mouse_joint.cpp", + "src/dynamics/b2_polygon_circle_contact.cpp", + "src/dynamics/b2_polygon_circle_contact.h", + "src/dynamics/b2_polygon_contact.cpp", + "src/dynamics/b2_polygon_contact.h", + "src/dynamics/b2_prismatic_joint.cpp", + "src/dynamics/b2_pulley_joint.cpp", + "src/dynamics/b2_revolute_joint.cpp", + "src/dynamics/b2_weld_joint.cpp", + "src/dynamics/b2_wheel_joint.cpp", + "src/dynamics/b2_world.cpp", + "src/dynamics/b2_world_callbacks.cpp", + "src/rope/b2_rope.cpp", + ], + hdrs = glob(["include/box2d/*.h"]), + includes = [ + "include", + "src", + ], + linkopts = [ + "-ldl", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) diff --git a/third_party/pip_requirements/requirements.txt b/third_party/pip_requirements/requirements.txt index 423881a3..7b7ebaad 100644 --- a/third_party/pip_requirements/requirements.txt +++ b/third_party/pip_requirements/requirements.txt @@ -6,6 +6,7 @@ dm-control filelock gym pygame +box2d mujoco mjc-mwe>=0.0.3 mujoco_py>=2.1.2.14