Skip to content

Commit

Permalink
feature(zc): add carracing in box2d (#575)
Browse files Browse the repository at this point in the history
* carracing-v0

* add config

* format

* init

* add env_table

* modify test_carracing_env

* modify config and env

* add gif
  • Loading branch information
Super1ce committed Feb 9, 2023
1 parent f2e5f81 commit b36dd80
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ P.S: The `.py` file in `Runnable Demo` can be found in `dizoo`
| 31 |[gym-pybullet-drones](https://github.com/utiasDSL/gym-pybullet-drones) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/gym-pybullet-drones/gym-pybullet-drones.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/gym_pybullet_drones/envs)<br>环境指南 |
| 32 |[beergame](https://github.com/OptMLGroup/DeepBeerInventory-RL) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/beergame/beergame.png) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/beergame/envs)<br>环境指南 |
| 33 |[classic_control/acrobot](https://github.com/openai/gym/tree/master/gym/envs/classic_control) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![original](./dizoo/classic_control/acrobot/acrobot.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/classic_control/acrobot/envs)<br>环境指南 |
| 34 |[box2d/car_racing](https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py) | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | ![continuous](https://img.shields.io/badge/-continous-green) | ![original](./dizoo/box2d/carracing/car_racing.gif) | [dizoo link](https://github.com/opendilab/DI-engine/tree/main/dizoo/box2d/carracing/envs)<br>环境指南 |

![discrete](https://img.shields.io/badge/-discrete-brightgreen) means discrete action space

Expand Down
Empty file.
Binary file added dizoo/box2d/carracing/car_racing.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions dizoo/box2d/carracing/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .carracing_dqn_config import carracing_dqn_config, carracing_dqn_create_config
63 changes: 63 additions & 0 deletions dizoo/box2d/carracing/config/carracing_dqn_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from easydict import EasyDict

nstep = 3
carracing_dqn_config = dict(
exp_name='carracing_dqn_seed0',
env=dict(
collector_env_num=8,
evaluator_env_num=8,
env_id='CarRacing-v2',
continuous=False,
n_evaluator_episode=8,
stop_value=900,
# replay_path='./carracing_dqn_seed0/video',
),
policy=dict(
cuda=True,
# load_path='carracing_dqn_seed0/ckpt/ckpt_best.pth.tar',
model=dict(
obs_shape=[3, 96, 96],
action_shape=5,
encoder_hidden_size_list=[64, 64, 128],
dueling=True,
),
discount_factor=0.99,
nstep=nstep,
learn=dict(
update_per_collect=10,
batch_size=64,
learning_rate=0.0001,
target_update_freq=100,
),
collect=dict(
n_sample=64,
),
other=dict(
eps=dict(
type='exp',
start=0.95,
end=0.1,
decay=50000,
),
replay_buffer=dict(replay_buffer_size=100000, )
),
),
)
carracing_dqn_config = EasyDict(carracing_dqn_config)
main_config = carracing_dqn_config

carracing_dqn_create_config = dict(
env=dict(
type='carracing',
import_names=['dizoo.box2d.carracing.envs.carracing_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(type='dqn'),
)
carracing_dqn_create_config = EasyDict(carracing_dqn_create_config)
create_config = carracing_dqn_create_config

if __name__ == "__main__":
# or you can enter `ding -m serial -c carracing_dqn_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline([main_config, create_config], seed=0)
1 change: 1 addition & 0 deletions dizoo/box2d/carracing/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .carracing_env import CarRacingEnv
161 changes: 161 additions & 0 deletions dizoo/box2d/carracing/envs/carracing_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
from typing import Optional
import copy
import os


import gym
import numpy as np
from easydict import EasyDict

from ding.envs import BaseEnv, BaseEnvTimestep
from ding.envs import ObsPlusPrevActRewWrapper
from ding.envs.common import affine_transform, save_frames_as_gif
from ding.torch_utils import to_ndarray
from ding.utils import ENV_REGISTRY


@ENV_REGISTRY.register('carracing')
class CarRacingEnv(BaseEnv):

config = dict(
replay_path=None,
save_replay_gif=False,
replay_path_gif=None,
action_clip=False,
)

@classmethod
def default_config(cls: type) -> EasyDict:
cfg = EasyDict(copy.deepcopy(cls.config))
cfg.cfg_type = cls.__name__ + 'Dict'
return cfg

def __init__(self, cfg: dict) -> None:
self._cfg = cfg
self._init_flag = False
# env_id:CarRacing-v2
self._env_id = cfg.env_id
self._replay_path = None
self._replay_path_gif = cfg.replay_path_gif
self._save_replay_gif = cfg.save_replay_gif
self._save_replay_count = 0
if cfg.continuous:
self._act_scale = cfg.act_scale # act_scale only works in continuous env
self._action_clip = cfg.action_clip
else:
self._act_scale = False

def reset(self) -> np.ndarray:
if not self._init_flag:
self._env = gym.make(self._cfg.env_id, continuous=self._cfg.continuous)
if self._replay_path is not None:
self._env = gym.wrappers.RecordVideo(
self._env,
video_folder=self._replay_path,
episode_trigger=lambda episode_id: True,
name_prefix='rl-video-{}'.format(id(self))
)
self._observation_space = gym.spaces.Box(
low=np.min(self._env.observation_space.low.astype(np.float32) / 255),
high=np.max(self._env.observation_space.high.astype(np.float32) / 255),
shape=(
self._env.observation_space.shape[2], self._env.observation_space.shape[0],
self._env.observation_space.shape[1]
),
dtype=np.float32
)
self._action_space = self._env.action_space
self._reward_space = gym.spaces.Box(
low=self._env.reward_range[0], high=self._env.reward_range[1], shape=(1, ), dtype=np.float32
)
self._init_flag = True
if hasattr(self, '_seed') and hasattr(self, '_dynamic_seed') and self._dynamic_seed:
np_seed = 100 * np.random.randint(1, 1000)
self._env.seed(self._seed + np_seed)
elif hasattr(self, '_seed'):
self._env.seed(self._seed)
self._eval_episode_return = 0
obs = self._env.reset()
obs = obs.astype(np.float32) / 255
obs = obs.transpose(2, 0, 1)
obs = to_ndarray(obs)
if self._save_replay_gif:
self._frames = []
return obs

def close(self) -> None:
if self._init_flag:
self._env.close()
self._init_flag = False

def render(self) -> None:
self._env.render()

def seed(self, seed: int, dynamic_seed: bool = True) -> None:
self._seed = seed
self._dynamic_seed = dynamic_seed
np.random.seed(self._seed)

def step(self, action: np.ndarray) -> BaseEnvTimestep:
assert isinstance(action, np.ndarray), type(action)
if action.shape == (1, ):
action = action.item() # 0-dim array
if self._act_scale:
action = affine_transform(action, action_clip=self._action_clip, min_val=-1, max_val=1)
if self._save_replay_gif:
self._frames.append(self._env.render(mode='rgb_array'))
obs, rew, done, info = self._env.step(action)
obs = obs.astype(np.float32) / 255
obs = obs.transpose(2, 0, 1)
self._eval_episode_return += rew
if done:
info['eval_episode_return'] = self._eval_episode_return
if self._save_replay_gif:
if not os.path.exists(self._replay_path_gif):
os.makedirs(self._replay_path_gif)
path = os.path.join(
self._replay_path_gif, '{}_episode_{}.gif'.format(self._env_id, self._save_replay_count)
)
save_frames_as_gif(self._frames, path)
self._save_replay_count += 1

obs = to_ndarray(obs)
rew = to_ndarray([rew]).astype(np.float32) # wrapped to be transferred to a array with shape (1,)
return BaseEnvTimestep(obs, rew, done, info)

def enable_save_replay(self, replay_path: Optional[str] = None) -> None:
if replay_path is None:
replay_path = './video'
self._replay_path = replay_path
self._save_replay_gif = True
self._save_replay_count = 0
# this function can lead to the meaningless result
self._env = gym.wrappers.RecordVideo(
self._env,
video_folder=self._replay_path,
episode_trigger=lambda episode_id: True,
name_prefix='rl-video-{}'.format(id(self))
)

def random_action(self) -> np.ndarray:
random_action = self.action_space.sample()
if isinstance(random_action, np.ndarray):
pass
elif isinstance(random_action, int):
random_action = to_ndarray([random_action], dtype=np.int64)
return random_action

@property
def observation_space(self) -> gym.spaces.Space:
return self._observation_space

@property
def action_space(self) -> gym.spaces.Space:
return self._action_space

@property
def reward_space(self) -> gym.spaces.Space:
return self._reward_space

def __repr__(self) -> str:
return "DI-engine CarRacing Env"
36 changes: 36 additions & 0 deletions dizoo/box2d/carracing/envs/test_carracing_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pytest
import numpy as np
from easydict import EasyDict
from carracing_env import CarRacingEnv


@pytest.mark.envtest
@pytest.mark.parametrize(
'cfg', [
EasyDict({
'env_id': 'CarRacing-v2',
'continuous': False,
'act_scale': False
})
]
)
class TestCarRacing:

def test_naive(self, cfg):
env = CarRacingEnv(cfg)
env.seed(314)
assert env._seed == 314
obs = env.reset()
assert obs.shape == (3, 96, 96)
for i in range(10):
random_action = env.random_action()
timestep = env.step(random_action)
print(timestep)
assert isinstance(timestep.obs, np.ndarray)
assert isinstance(timestep.done, bool)
assert timestep.obs.shape == (3, 96, 96)
assert timestep.reward.shape == (1, )
assert timestep.reward >= env.reward_space.low
assert timestep.reward <= env.reward_space.high
print(env.observation_space, env.action_space, env.reward_space)
env.close()

0 comments on commit b36dd80

Please sign in to comment.