Use action_transformer environment

<https://medium.com/@kaige.yang0110/ray-rllib-how-to-train-dreamerv3-on-vizdoom-and-atari-122c8bd1170b>

In [None]:
import cv2
import gymnasium as gym
import numpy as np
import vizdoom as vzd
import skimage.transform
from tqdm import tqdm

In [None]:
DEFAULT_ENV = "VizdoomBasic-v0"
AVAILABLE_ENVS = [
    env
    for env in [env_spec.id for key, env_spec in gym.envs.registry.items()]
    if "Vizdoom" in env
]
# Height and width of the resized image
IMAGE_SHAPE = (64, 64)

# Training parameters
TRAINING_TIMESTEPS = int(1e6)
N_STEPS = 128
N_ENVS = 8
FRAME_SKIP = 4

In [None]:
class ObservationWrapper(gym.ObservationWrapper):
    """
    ViZDoom environments return dictionaries as observations, containing
    the main image as well other info.
    The image is also too large for normal training.

    This wrapper replaces the dictionary observation space with a simple
    Box space (i.e., only the RGB image), and also resizes the image to a
    smaller size.

    NOTE: Ideally, you should set the image size to smaller in the scenario files
        for faster running of ViZDoom. This can really impact performance,
        and this code is pretty slow because of this!
    """

    def __init__(self, env, shape=IMAGE_SHAPE):
        super().__init__(env)
        self.image_shape = shape
        # print('shape', shape)
        self.image_shape_reverse = shape[::-1]
        # print('image_shape_reverse', self.image_shape_reverse)
        self.env.frame_skip = FRAME_SKIP

        # Create new observation space with the new shape
        # print('env.obs', env.observation_space)
        num_channels = env.observation_space["screen"].shape[-1]
        new_shape = (self.image_shape[0], self.image_shape[1], num_channels)
        # print('new_shape', new_shape)
        self.observation_space = gym.spaces.Box(0, 255, shape=new_shape, dtype=np.float32)

    def observation(self, observation):
        # print('observation["screen"].shape', observation["screen"].shape)
        observation = cv2.resize(observation["screen"], self.image_shape_reverse)
        # print('obs.shape', observation.shape)
        observation = observation.astype('float32')
        # print('obs.shape', observation.shape)
        return observation

In [None]:
def wrap_env(env):
    env = ObservationWrapper(env)
    env = gym.wrappers.TransformReward(env, lambda r: r * 0.01)
    return env

def reward_wrap_env(env):
    env = gym.wrappers.TransformReward(env, lambda r: r * 0.01)
    return env

In [None]:
# import vizdoom.gymnasium_wrapper  # noqa
import vizdoom
from vizdoom.gymnasium_wrapper.gymnasium_env_defns import VizdoomScenarioEnv
from ray.tune.registry import register_env

In [None]:
config = {"scenario_file": "basic.cfg"}
def env_creator(env_config):
    return wrap_env(VizdoomScenarioEnv(**config))
register_env('vizdoom_env', env_creator)

In [None]:
import ray
from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config

ray.init()


In [None]:
num_cpus = int(ray.cluster_resources()['CPU'])
# num_gpus = int(ray.cluster_resources()['GPU'])

num_learner_workers = num_cpus-1
# num_gpus_per_learner_worker = 1
num_cpus_per_learner_workers = 1

config = (
        DreamerV3Config()
        .environment(
            env='vizdoom_env',
        )
        .learners(
            # num_learner=num_learner_workers,
            num_cpus_per_learner=num_cpus_per_learner_workers,
        )
        .resources(
            num_learner_workers=num_learner_workers,
            # num_gpus_per_learner_worker=1,
            # num_cpus_for_local_worker=1,
            num_cpus_for_main_process = 1,
            # num_cpus_per_learner_worker=num_cpus_per_learner_workers,
        )
        .rollouts(num_envs_per_env_runner=1, remote_worker_envs=False)
        .training(
            model_size="S",
            training_ratio=512,
            batch_size_B=16*num_learner_workers,
        )

    )

In [None]:
iteration_num = 1000

In [None]:
algo = config.build()
print('------ algo=', algo)
for iteration in tqdm(range(iteration_num)):
    result = algo.train()
    print('result.keys', result.keys())

In [None]:
from ray import train, tune

In [None]:
ck_save_freq = 100
tuner = tune.Tuner(
    "DreamerV3",
    run_config=train.RunConfig(
        stop={"training_iteration": iteration_num},
        checkpoint_config=train.CheckpointConfig(checkpoint_frequency=ck_save_freq, checkpoint_at_end=True)
    ),
    param_space=config,
)

result = tuner.fit()

In [None]:
ray.shutdown()