diff --git a/README.md b/README.md index 127b15745c..dcb8d46c3b 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,7 @@ P.S: The `.py` file in `Runnable Demo` can be found in `dizoo` | 31 |[gym-pybullet-drones](https://github.com/utiasDSL/gym-pybullet-drones) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/gym-pybullet-drones/gym-pybullet-drones.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/gym_pybullet_drones/envs)
环境指南 | | 32 |[beergame](https://github.com/OptMLGroup/DeepBeerInventory-RL) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/beergame/beergame.png) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/beergame/envs)
环境指南 | | 33 |[classic_control/acrobot](https://github.com/openai/gym/tree/master/gym/envs/classic_control) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/classic_control/acrobot/acrobot.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/classic_control/acrobot/envs)
环境指南 | +| 34 |[box2d/car_racing](https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/box2d/carracing/car_racing.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/box2d/carracing/envs)
环境指南 | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) means discrete action space diff --git a/dizoo/box2d/carracing/__init__.py b/dizoo/box2d/carracing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dizoo/box2d/carracing/car_racing.gif b/dizoo/box2d/carracing/car_racing.gif new file mode 100644 index 0000000000..5d3bdd38e9 Binary files /dev/null and b/dizoo/box2d/carracing/car_racing.gif differ diff --git a/dizoo/box2d/carracing/config/__init__.py b/dizoo/box2d/carracing/config/__init__.py new file mode 100644 index 0000000000..1571e58a64 --- /dev/null +++ b/dizoo/box2d/carracing/config/__init__.py @@ -0,0 +1 @@ +from .carracing_dqn_config import carracing_dqn_config, carracing_dqn_create_config diff --git a/dizoo/box2d/carracing/config/carracing_dqn_config.py b/dizoo/box2d/carracing/config/carracing_dqn_config.py new file mode 100644 index 0000000000..31dd42fca8 --- /dev/null +++ b/dizoo/box2d/carracing/config/carracing_dqn_config.py @@ -0,0 +1,63 @@ +from easydict import EasyDict + +nstep = 3 +carracing_dqn_config = dict( + exp_name='carracing_dqn_seed0', + env=dict( + collector_env_num=8, + evaluator_env_num=8, + env_id='CarRacing-v2', + continuous=False, + n_evaluator_episode=8, + stop_value=900, + # replay_path='./carracing_dqn_seed0/video', + ), + policy=dict( + cuda=True, + # load_path='carracing_dqn_seed0/ckpt/ckpt_best.pth.tar', + model=dict( + obs_shape=[3, 96, 96], + action_shape=5, + encoder_hidden_size_list=[64, 64, 128], + dueling=True, + ), + discount_factor=0.99, + nstep=nstep, + learn=dict( + update_per_collect=10, + batch_size=64, + learning_rate=0.0001, + target_update_freq=100, + ), + collect=dict( + n_sample=64, + ), + other=dict( + eps=dict( + type='exp', + start=0.95, + end=0.1, + decay=50000, + ), + replay_buffer=dict(replay_buffer_size=100000, ) + ), + ), +) +carracing_dqn_config = EasyDict(carracing_dqn_config) +main_config = carracing_dqn_config + +carracing_dqn_create_config = dict( + env=dict( + type='carracing', + import_names=['dizoo.box2d.carracing.envs.carracing_env'], + ), + env_manager=dict(type='subprocess'), + policy=dict(type='dqn'), +) +carracing_dqn_create_config = EasyDict(carracing_dqn_create_config) +create_config = carracing_dqn_create_config + +if __name__ == "__main__": + # or you can enter `ding -m serial -c carracing_dqn_config.py -s 0` + from ding.entry import serial_pipeline + serial_pipeline([main_config, create_config], seed=0) \ No newline at end of file diff --git a/dizoo/box2d/carracing/envs/__init__.py b/dizoo/box2d/carracing/envs/__init__.py new file mode 100644 index 0000000000..a36760ccf7 --- /dev/null +++ b/dizoo/box2d/carracing/envs/__init__.py @@ -0,0 +1 @@ +from .carracing_env import CarRacingEnv diff --git a/dizoo/box2d/carracing/envs/carracing_env.py b/dizoo/box2d/carracing/envs/carracing_env.py new file mode 100644 index 0000000000..39b82a2502 --- /dev/null +++ b/dizoo/box2d/carracing/envs/carracing_env.py @@ -0,0 +1,161 @@ +from typing import Optional +import copy +import os + + +import gym +import numpy as np +from easydict import EasyDict + +from ding.envs import BaseEnv, BaseEnvTimestep +from ding.envs import ObsPlusPrevActRewWrapper +from ding.envs.common import affine_transform, save_frames_as_gif +from ding.torch_utils import to_ndarray +from ding.utils import ENV_REGISTRY + + +@ENV_REGISTRY.register('carracing') +class CarRacingEnv(BaseEnv): + + config = dict( + replay_path=None, + save_replay_gif=False, + replay_path_gif=None, + action_clip=False, + ) + + @classmethod + def default_config(cls: type) -> EasyDict: + cfg = EasyDict(copy.deepcopy(cls.config)) + cfg.cfg_type = cls.__name__ + 'Dict' + return cfg + + def __init__(self, cfg: dict) -> None: + self._cfg = cfg + self._init_flag = False + # env_id:CarRacing-v2 + self._env_id = cfg.env_id + self._replay_path = None + self._replay_path_gif = cfg.replay_path_gif + self._save_replay_gif = cfg.save_replay_gif + self._save_replay_count = 0 + if cfg.continuous: + self._act_scale = cfg.act_scale # act_scale only works in continuous env + self._action_clip = cfg.action_clip + else: + self._act_scale = False + + def reset(self) -> np.ndarray: + if not self._init_flag: + self._env = gym.make(self._cfg.env_id, continuous=self._cfg.continuous) + if self._replay_path is not None: + self._env = gym.wrappers.RecordVideo( + self._env, + video_folder=self._replay_path, + episode_trigger=lambda episode_id: True, + name_prefix='rl-video-{}'.format(id(self)) + ) + self._observation_space = gym.spaces.Box( + low=np.min(self._env.observation_space.low.astype(np.float32) / 255), + high=np.max(self._env.observation_space.high.astype(np.float32) / 255), + shape=( + self._env.observation_space.shape[2], self._env.observation_space.shape[0], + self._env.observation_space.shape[1] + ), + dtype=np.float32 + ) + self._action_space = self._env.action_space + self._reward_space = gym.spaces.Box( + low=self._env.reward_range[0], high=self._env.reward_range[1], shape=(1, ), dtype=np.float32 + ) + self._init_flag = True + if hasattr(self, '_seed') and hasattr(self, '_dynamic_seed') and self._dynamic_seed: + np_seed = 100 * np.random.randint(1, 1000) + self._env.seed(self._seed + np_seed) + elif hasattr(self, '_seed'): + self._env.seed(self._seed) + self._eval_episode_return = 0 + obs = self._env.reset() + obs = obs.astype(np.float32) / 255 + obs = obs.transpose(2, 0, 1) + obs = to_ndarray(obs) + if self._save_replay_gif: + self._frames = [] + return obs + + def close(self) -> None: + if self._init_flag: + self._env.close() + self._init_flag = False + + def render(self) -> None: + self._env.render() + + def seed(self, seed: int, dynamic_seed: bool = True) -> None: + self._seed = seed + self._dynamic_seed = dynamic_seed + np.random.seed(self._seed) + + def step(self, action: np.ndarray) -> BaseEnvTimestep: + assert isinstance(action, np.ndarray), type(action) + if action.shape == (1, ): + action = action.item() # 0-dim array + if self._act_scale: + action = affine_transform(action, action_clip=self._action_clip, min_val=-1, max_val=1) + if self._save_replay_gif: + self._frames.append(self._env.render(mode='rgb_array')) + obs, rew, done, info = self._env.step(action) + obs = obs.astype(np.float32) / 255 + obs = obs.transpose(2, 0, 1) + self._eval_episode_return += rew + if done: + info['eval_episode_return'] = self._eval_episode_return + if self._save_replay_gif: + if not os.path.exists(self._replay_path_gif): + os.makedirs(self._replay_path_gif) + path = os.path.join( + self._replay_path_gif, '{}_episode_{}.gif'.format(self._env_id, self._save_replay_count) + ) + save_frames_as_gif(self._frames, path) + self._save_replay_count += 1 + + obs = to_ndarray(obs) + rew = to_ndarray([rew]).astype(np.float32) # wrapped to be transferred to a array with shape (1,) + return BaseEnvTimestep(obs, rew, done, info) + + def enable_save_replay(self, replay_path: Optional[str] = None) -> None: + if replay_path is None: + replay_path = './video' + self._replay_path = replay_path + self._save_replay_gif = True + self._save_replay_count = 0 + # this function can lead to the meaningless result + self._env = gym.wrappers.RecordVideo( + self._env, + video_folder=self._replay_path, + episode_trigger=lambda episode_id: True, + name_prefix='rl-video-{}'.format(id(self)) + ) + + def random_action(self) -> np.ndarray: + random_action = self.action_space.sample() + if isinstance(random_action, np.ndarray): + pass + elif isinstance(random_action, int): + random_action = to_ndarray([random_action], dtype=np.int64) + return random_action + + @property + def observation_space(self) -> gym.spaces.Space: + return self._observation_space + + @property + def action_space(self) -> gym.spaces.Space: + return self._action_space + + @property + def reward_space(self) -> gym.spaces.Space: + return self._reward_space + + def __repr__(self) -> str: + return "DI-engine CarRacing Env" diff --git a/dizoo/box2d/carracing/envs/test_carracing_env.py b/dizoo/box2d/carracing/envs/test_carracing_env.py new file mode 100644 index 0000000000..7eb4a75039 --- /dev/null +++ b/dizoo/box2d/carracing/envs/test_carracing_env.py @@ -0,0 +1,36 @@ +import pytest +import numpy as np +from easydict import EasyDict +from carracing_env import CarRacingEnv + + +@pytest.mark.envtest +@pytest.mark.parametrize( + 'cfg', [ + EasyDict({ + 'env_id': 'CarRacing-v2', + 'continuous': False, + 'act_scale': False + }) + ] +) +class TestCarRacing: + + def test_naive(self, cfg): + env = CarRacingEnv(cfg) + env.seed(314) + assert env._seed == 314 + obs = env.reset() + assert obs.shape == (3, 96, 96) + for i in range(10): + random_action = env.random_action() + timestep = env.step(random_action) + print(timestep) + assert isinstance(timestep.obs, np.ndarray) + assert isinstance(timestep.done, bool) + assert timestep.obs.shape == (3, 96, 96) + assert timestep.reward.shape == (1, ) + assert timestep.reward >= env.reward_space.low + assert timestep.reward <= env.reward_space.high + print(env.observation_space, env.action_space, env.reward_space) + env.close()