ding/envs/env/tests/test_ding_env_wrapper.py

import gym
import numpy as np
import pytest
from easydict import EasyDict

from ding.torch_utils import to_ndarray
from ding.envs.env import DingEnvWrapper


class FakeEnvForTest(gym.Env):

    def __init__(self):
        self.observation_space = gym.spaces.Box(low=-1., high=1., shape=(10, ), dtype=np.float32)
        self.action_space = gym.spaces.Tuple(
            (
                gym.spaces.Discrete(3),
                gym.spaces.Box(low=np.array([0., -1.]), high=np.array([1., 1.]), shape=(2, ), dtype=np.float32)
            )
        )

    def step(self, action):
        assert self.action_space.contains(action)
        self._step_count += 1
        obs = self.observation_space.sample()
        obs = to_ndarray(obs).astype(np.float32)
        done = True if self._step_count == 100 else False
        return (obs, 0.5, done, {})

    def reset(self):
        self._step_count = 0
        obs = self.observation_space.sample()
        obs = to_ndarray(obs).astype(np.float32)
        return obs

    def render(self, mode="human", close=False):
        pass

    def close(self):
        pass


gym.envs.registration.register(
    id="FakeHybridForTest-v0",
    entry_point="ding.envs.env.tests.test_ding_env_wrapper:FakeEnvForTest",
)


class TestDingEnvWrapper:

    @pytest.mark.unittest
    @pytest.mark.parametrize('env_id', ['CartPole-v0', 'Pendulum-v1'])
    def test_cartpole_pendulum(self, env_id):
        env = gym.make(env_id)
        ding_env = DingEnvWrapper(env=env)
        print(ding_env.observation_space, ding_env.action_space, ding_env.reward_space)
        cfg = EasyDict(dict(
            collector_env_num=16,
            evaluator_env_num=3,
            is_train=True,
        ))
        l1 = ding_env.create_collector_env_cfg(cfg)
        assert isinstance(l1, list)
        l1 = ding_env.create_evaluator_env_cfg(cfg)
        assert isinstance(l1, list)
        obs = ding_env.reset()
        assert isinstance(obs, np.ndarray)
        action = ding_env.random_action()
        # assert isinstance(action, np.ndarray)
        print('random_action: {}, action_space: {}'.format(action.shape, ding_env.action_space))

    @pytest.mark.envtest
    def test_mujoco(self):
        env_cfg = EasyDict(
            env_id='Ant-v3',
            env_wrapper='mujoco_default',
        )
        ding_env_mujoco = DingEnvWrapper(cfg=env_cfg)
        obs = ding_env_mujoco.reset()
        assert isinstance(obs, np.ndarray)
        # action_dim = ding_env_mujoco.action_space.shape  # n
        while True:
            # action = np.random.random(size=action_dim)  # Continuous Action
            action = ding_env_mujoco.random_action()
            timestep = ding_env_mujoco.step(action)
            # print(_, timestep.reward)
            assert timestep.reward.shape == (1, ), timestep.reward.shape
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(ding_env_mujoco.observation_space, ding_env_mujoco.action_space, ding_env_mujoco.reward_space)
        action = ding_env_mujoco.random_action()
        # assert isinstance(action, np.ndarray)
        assert action.shape == ding_env_mujoco.action_space.shape

    @pytest.mark.envtest
    @pytest.mark.parametrize('atari_env_id', ['Pong-v4', 'MontezumaRevenge-v4'])
    def test_atari(self, atari_env_id):
        env_cfg = EasyDict(
            env_id=atari_env_id,
            env_wrapper='atari_default',
        )
        ding_env_atari = DingEnvWrapper(cfg=env_cfg)

        ding_env_atari.enable_save_replay('atari_path/')
        obs = ding_env_atari.reset()
        assert isinstance(obs, np.ndarray)
        assert obs.shape == ding_env_atari.observation_space.shape  # (4, 84, 84)
        # action_dim = ding_env_atari.action_space.n
        while True:
            # action = np.random.choice(range(action_dim), size=(1, ))  # Discrete Action
            action = ding_env_atari.random_action()
            timestep = ding_env_atari.step(action)
            # print(timestep.reward)
            assert timestep.reward.shape == ding_env_atari.reward_space.shape, timestep.reward.shape  # (1, )
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(ding_env_atari.observation_space, ding_env_atari.action_space, ding_env_atari.reward_space)
        action = ding_env_atari.random_action()
        # assert isinstance(action, np.ndarray)
        assert action.shape == (1, )

    @pytest.mark.unittest
    @pytest.mark.parametrize('lun_bip_env_id', ['LunarLander-v2', 'LunarLanderContinuous-v2', 'BipedalWalker-v3'])
    def test_lunarlander_bipedalwalker(self, lun_bip_env_id):
        env_cfg = EasyDict(
            env_id=lun_bip_env_id,
            env_wrapper='default',
        )
        ding_env_lun_bip = DingEnvWrapper(cfg=env_cfg)

        obs = ding_env_lun_bip.reset()
        assert isinstance(obs, np.ndarray)
        assert obs.shape == ding_env_lun_bip.observation_space.shape
        # action_space = ding_env_lun_bip.action_space
        # if lun_bip_env_id in ['LunarLanderContinuous-v2', 'BipedalWalker-v3']:
        #     action_dim = action_space.shape
        # else:
        #     action_dim = action_space.n
        while True:
            # if lun_bip_env_id in ['LunarLanderContinuous-v2', 'BipedalWalker-v3']:
            #     action = np.random.random(size=action_dim)  # Continuous Action
            # else:
            #     action = np.random.choice(range(action_dim), size=(1, ))  # Discrete Action
            action = ding_env_lun_bip.random_action()
            timestep = ding_env_lun_bip.step(action)
            # print(timestep.reward)
            assert timestep.reward.shape == ding_env_lun_bip.reward_space.shape, timestep.reward.shape  # (1, )
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(ding_env_lun_bip.observation_space, ding_env_lun_bip.action_space, ding_env_lun_bip.reward_space)
        action = ding_env_lun_bip.random_action()
        # assert isinstance(action, np.ndarray)
        print('random_action: {}, action_space: {}'.format(action.shape, ding_env_lun_bip.action_space))

    @pytest.mark.unittest
    def test_hybrid(self):
        env_cfg = EasyDict(env_id='FakeHybridForTest-v0', env_wrapper='gym_hybrid_default')
        ding_env_hybrid = DingEnvWrapper(cfg=env_cfg)

        obs = ding_env_hybrid.reset()
        assert isinstance(obs, np.ndarray)
        assert obs.shape == ding_env_hybrid.observation_space.shape
        while True:
            action = ding_env_hybrid.random_action()
            # print('random_action:', action)
            for k, v in action.items():
                if isinstance(v, int):
                    continue
                # print('before: {}, after: {}'.format(v.shape, ding_env_hybrid.action_space[k].shape))
                v.shape = ding_env_hybrid.action_space[k].shape
            timestep = ding_env_hybrid.step(action)
            # print(timestep.reward)
            assert timestep.reward.shape == ding_env_hybrid.reward_space.shape, timestep.reward.shape  # (1, )
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(ding_env_hybrid.observation_space, ding_env_hybrid.action_space, ding_env_hybrid.reward_space)
        action = ding_env_hybrid.random_action()
        print('random_action', action)
        assert isinstance(action, dict)

    @pytest.mark.envtest
    def test_AllinObsWrapper(self):
        env_cfg = EasyDict(env_id='PongNoFrameskip-v4', env_wrapper='reward_in_obs')
        ding_env_aio = DingEnvWrapper(cfg=env_cfg)

        data = ding_env_aio.reset()
        assert isinstance(data, dict)
        assert 'obs' in data.keys() and 'reward' in data.keys()
        assert data['obs'].shape == ding_env_aio.observation_space
        while True:
            action = ding_env_aio.random_action()
            timestep = ding_env_aio.step(action)
            # print(timestep.reward)
            assert isinstance(timestep.obs, dict)
            if timestep.done:
                assert 'eval_episode_return' in timestep.info, timestep.info
                break
        print(ding_env_aio.observation_space, ding_env_aio.action_space, ding_env_aio.reward_space)