## Ray startup

In [1]:
import ray

ray.shutdown()
ray.init(ignore_reinit_error=True, num_cpus=10, num_gpus=1.0)
# print("Dashboard URL: http://{}".format( ray.get_webui_url()))

2023-05-19 09:03:18,780	INFO worker.py:1519 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.0
Ray version:,2.1.0
Dashboard:,http://127.0.0.1:8265


## Registering envs for training

In [None]:
import gym
from ray.rllib.env.wrappers.dm_control_wrapper import DMCEnv
from ray.tune.registry import register_env
from dm_control.suite import ALL_TASKS, BENCHMARKING, EXTRA

In [2]:
from_pixels = False
height = 64
width = 64
frame_skip = 2
channels_first = True
max_horizon = 1000  # dosen't work

DEFAULT_ENVS_REGISTRY = {}
for env, task in BENCHMARKING:
    env_task_name = f"{env}-{task}"
    print(env_task_name)
    env_creator = lambda config: DMCEnv(
        env,
        task,
        from_pixels=from_pixels,
        height=height,
        width=width,
        frame_skip=frame_skip,
        channels_first=channels_first,
    )
    register_env(env_task_name, env_creator)
    DEFAULT_ENVS_REGISTRY[env_task_name] = env_creator

print(f">>> Number of registered envs: {len(BENCHMARKING)}")

acrobot-swingup
acrobot-swingup_sparse
ball_in_cup-catch
cartpole-balance
cartpole-balance_sparse
cartpole-swingup
cartpole-swingup_sparse
cheetah-run
finger-spin
finger-turn_easy
finger-turn_hard
fish-upright
fish-swim
hopper-stand
hopper-hop
humanoid-stand
humanoid-walk
humanoid-run
manipulator-bring_ball
pendulum-swingup
point_mass-easy
reacher-easy
reacher-hard
swimmer-swimmer6
swimmer-swimmer15
walker-stand
walker-walk
walker-run
>>> Number of registered envs: 28


## Train agents on DMC Benchmark tasks

- DDPG
- PPO
- SAC

In [None]:
import copy

import ray.rllib.algorithms.ddpg as ddpg
import ray.rllib.algorithms.ppo as ppo
import ray.rllib.algorithms.sac as sac
from ray.rllib.utils.torch_utils import set_torch_seed

In [3]:
seeds = (0, 5, 10, 15, 20)
training_epoches = 100  # x horizon equals ~10^5 env steps
AGENT_SAMPLED_STEPS = 500000

config = {
    "env": None,
    "horizon": max_horizon,
    "num_workers": 8,
    "framework": "torch",
    "model": {
        "fcnet_hiddens": [64, 64],
        "fcnet_activation": "relu",
    },
    "num_gpus": 1,
    "evaluation_num_workers": 2,
    "evaluation_config": {
        "render_env": False,
    },
    "evaluation_interval": 10,
    "log_level": "ERROR",
}

### DDPG

In [None]:
SKIP_ENVS = (
    "acrobot-swingup",
    "acrobot-swingup_sparse",
    "ball_in_cup-catch",
    "cartpole-balance",
    # "cartpole-balance_sparse",
    # "cartpole-swingup",
    # "cartpole-swingup_sparse",
    # "cheetah-run",
    # "finger-spin",
    # "finger-turn_easy",
    # "finger-turn_hard",
    # "fish-swim",
    # "fish-upright",
    # "hopper-hop",
    # "hopper-stand",
    # "humanoid-run",
    # "humanoid-stand",
    # "humanoid-walk",
    # "manipulator-bring_ball",
    # "point_mass-easy",
    # "reacher-easy",
    # "reacher-hard",
    # "swimmer-swimmer15",
    # "swimmer-swimmer6",
    # "walker-run",
    # "walker-stand",
    # "walker-walk",
    # 'pendulum-swingup',
)

LEARNING_ENVS = tuple(
    [f"{x}-{y}" for x, y in BENCHMARKING if f"{x}-{y}" not in SKIP_ENVS]
)

print(LEARNING_ENVS)
print(f">>> Number of learning envs: {len(LEARNING_ENVS)}")

SEEDS_ALREADY_DONE = 3
print(f">>> FIRST {SEEDS_ALREADY_DONE} SEEDS FOR {LEARNING_ENVS[0]} WILL BE SKIPEPD")

In [None]:
algo_name = "DDPG"

for env_task in LEARNING_ENVS:
    for seed in seeds:
        print(f"> Algo: {algo_name} | Training task: {env_task} | Seed: {seed}")

        if SEEDS_ALREADY_DONE:
            SEEDS_ALREADY_DONE -= 1
            print(
                f"> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE={SEEDS_ALREADY_DONE+1})"
            )
            continue

        set_torch_seed(seed)
        config["env"] = env_task

        algo = ddpg.DDPG(config=config)

        algo._counters["num_agent_steps_sampled"] = 0
        while algo._counters["num_agent_steps_sampled"] < AGENT_SAMPLED_STEPS:
            algo.train()

        algo.evaluate()
        checkpoint_dir = algo.save(
            f"./checkpoints/{algo_name}/{env_task}_{seed}/final_checkpoint"
        )
        algo.stop()

### PPO

In [5]:
SKIP_ENVS = (
    "acrobot-swingup",
    "acrobot-swingup_sparse",
    "ball_in_cup-catch",
    "cartpole-balance",
    "cartpole-balance_sparse",
    "cartpole-swingup",
    "cartpole-swingup_sparse",
    "cheetah-run",
    "finger-spin",
    "finger-turn_easy",
    "finger-turn_hard",
    "fish-upright",
    "fish-swim",
    "hopper-hop",
    "hopper-stand",
    # "humanoid-run",
    # "humanoid-stand",
    "humanoid-walk",
    "manipulator-bring_ball",
    "point_mass-easy",
    "pendulum-swingup",
    "reacher-easy",
    "reacher-hard",
    "swimmer-swimmer15",
    "swimmer-swimmer6",
    "walker-run",
    "walker-stand",
    "walker-walk",
)

LEARNING_ENVS = tuple(
    [f"{x}-{y}" for x, y in BENCHMARKING if f"{x}-{y}" not in SKIP_ENVS]
)

print(LEARNING_ENVS)
print(f">>> Number of learning envs: {len(LEARNING_ENVS)}")

SEEDS_ALREADY_DONE = 0
print(f">>> FIRST {SEEDS_ALREADY_DONE} SEEDS FOR {LEARNING_ENVS[0]} WILL BE SKIPEPD")

('humanoid-stand', 'humanoid-run')
>>> Number of learning envs: 2
>>> FIRST 0 SEEDS FOR humanoid-stand WILL BE SKIPEPD


In [6]:
import ray.rllib.algorithms.ppo as ppo

algo_name = "PPO"

for env_task in LEARNING_ENVS:
    for seed in seeds:
        print(f"> Algo: {algo_name} | Training task: {env_task} | Seed: {seed}")

        if SEEDS_ALREADY_DONE:
            SEEDS_ALREADY_DONE -= 1
            print(
                f"> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE={SEEDS_ALREADY_DONE+1})"
            )
            continue

        set_torch_seed(seed)
        config["env"] = env_task

        algo = ppo.PPO(config=config)

        algo._counters["num_agent_steps_sampled"] = 0
        while algo._counters["num_agent_steps_sampled"] < AGENT_SAMPLED_STEPS:
            algo.train()

        algo.evaluate()
        checkpoint_dir = algo.save(
            f"./checkpoints/{algo_name}/{env_task}_{seed}/final_checkpoint"
        )
        algo.stop()

2023-05-19 09:03:25,319	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2023-05-19 09:03:25,320	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


> Algo: PPO | Training task: humanoid-stand | Seed: 0


[2m[36m(RolloutWorker pid=1399901)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1399901)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1399901)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1399895)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1399895)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1399895)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1399896)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1399896)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1399896)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-stand | Seed: 5


[2m[36m(RolloutWorker pid=1401222)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1401222)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1401222)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1401220)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1401220)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1401220)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1401219)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1401219)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1401219)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-stand | Seed: 10


[2m[36m(RolloutWorker pid=1402204)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1402204)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1402204)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1402203)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1402203)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1402203)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1402247)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1402247)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1402247)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-stand | Seed: 15


[2m[36m(RolloutWorker pid=1403230)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1403230)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1403230)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1403228)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1403228)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1403228)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1403232)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1403232)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1403232)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-stand | Seed: 20


[2m[36m(RolloutWorker pid=1409622)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1409622)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1409622)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1409621)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1409621)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1409621)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1409674)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1409674)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1409674)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-run | Seed: 0


[2m[36m(RolloutWorker pid=1417899)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1417899)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1417899)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1417902)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1417902)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1417902)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1417901)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1417901)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1417901)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-run | Seed: 5


[2m[36m(RolloutWorker pid=1426062)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1426062)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1426062)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1426057)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1426057)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1426057)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1426061)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1426061)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1426061)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-run | Seed: 10


[2m[36m(RolloutWorker pid=1434170)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1434170)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1434170)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1434168)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1434168)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1434168)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1434165)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1434165)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1434165)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-run | Seed: 15


[2m[36m(RolloutWorker pid=1442706)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1442706)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1442706)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1442734)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1442734)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1442734)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1442733)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1442733)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1442733)[0m   logger.warn(f"Box 

> Algo: PPO | Training task: humanoid-run | Seed: 20


[2m[36m(RolloutWorker pid=1449415)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1449415)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1449415)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1449418)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1449418)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1449418)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1449420)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1449420)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1449420)[0m   logger.warn(f"Box 

### SAC

In [6]:
SKIP_ENVS = (
    "acrobot-swingup",
    "acrobot-swingup_sparse",
    "ball_in_cup-catch",
    "cartpole-balance",
    "cartpole-balance_sparse",
    "cartpole-swingup",
    "cartpole-swingup_sparse",
    "cheetah-run",
    "finger-spin",
    "finger-turn_easy",
    "finger-turn_hard",
    "fish-upright",
    "fish-swim",
    # "hopper-hop",
    "hopper-stand",
    "humanoid-run",
    # "humanoid-stand",
    # "humanoid-walk",
    "manipulator-bring_ball",
    "point_mass-easy",
    "pendulum-swingup",
    "reacher-easy",
    "reacher-hard",
    "swimmer-swimmer15",
    "swimmer-swimmer6",
    "walker-run",
    "walker-stand",
    "walker-walk",
)

LEARNING_ENVS = tuple(
    [f"{x}-{y}" for x, y in BENCHMARKING if f"{x}-{y}" not in SKIP_ENVS]
)

print(LEARNING_ENVS)
print(f">>> Number of learning envs: {len(LEARNING_ENVS)}")

SEEDS_ALREADY_DONE = 4
print(f">>> FIRST {SEEDS_ALREADY_DONE} SEEDS FOR {LEARNING_ENVS[0]} WILL BE SKIPEPD")

('hopper-hop', 'humanoid-stand', 'humanoid-walk')
>>> Number of learning envs: 3
>>> FIRST 4 SEEDS FOR hopper-hop WILL BE SKIPEPD


In [None]:
algo_name = "SAC"

for env_task in LEARNING_ENVS:
    for seed in seeds:
        print(f"> Algo: {algo_name} | Training task: {env_task} | Seed: {seed}")

        if SEEDS_ALREADY_DONE:
            SEEDS_ALREADY_DONE -= 1
            print(
                f"> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE={SEEDS_ALREADY_DONE+1})"
            )
            continue

        set_torch_seed(seed)
        config["env"] = env_task

        algo = sac.SAC(config=config)

        algo._counters["num_agent_steps_sampled"] = 0
        while algo._counters["num_agent_steps_sampled"] < AGENT_SAMPLED_STEPS:
            algo.train()

        algo.evaluate()
        checkpoint_dir = algo.save(
            f"./checkpoints/{algo_name}/{env_task}_{seed}/final_checkpoint"
        )
        algo.stop()

2023-05-15 00:26:35,826	INFO simple_q.py:307 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
2023-05-15 00:26:35,827	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


> Algo: SAC | Training task: hopper-hop | Seed: 0
> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE=4)
> Algo: SAC | Training task: hopper-hop | Seed: 5
> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE=3)
> Algo: SAC | Training task: hopper-hop | Seed: 10
> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE=2)
> Algo: SAC | Training task: hopper-hop | Seed: 15
> SKIPPING SEED ACCORDING TO THE RUN PARAMETER! (SEEDS_ALREADY_DONE=1)
> Algo: SAC | Training task: hopper-hop | Seed: 20


[2m[36m(RolloutWorker pid=1516479)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1516479)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1516479)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1516480)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1516480)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1516480)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1516481)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1516481)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1516481)[0m   logger.warn(f"Box 

> Algo: SAC | Training task: humanoid-stand | Seed: 0


[2m[36m(RolloutWorker pid=1524347)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1524347)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1524347)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1524340)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1524340)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1524340)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1524341)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1524341)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1524341)[0m   logger.warn(f"Box 

> Algo: SAC | Training task: humanoid-stand | Seed: 5


[2m[36m(RolloutWorker pid=1533039)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1533039)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1533039)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1533035)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1533035)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1533035)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1533037)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1533037)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1533037)[0m   logger.warn(f"Box 

> Algo: SAC | Training task: humanoid-stand | Seed: 10


[2m[36m(RolloutWorker pid=1541763)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1541763)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1541763)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1541765)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1541765)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1541765)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1541766)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1541766)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1541766)[0m   logger.warn(f"Box 

> Algo: SAC | Training task: humanoid-stand | Seed: 15


[2m[36m(RolloutWorker pid=1550444)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1550444)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1550444)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1550446)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1550446)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1550446)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1550442)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1550442)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1550442)[0m   logger.warn(f"Box 

> Algo: SAC | Training task: humanoid-stand | Seed: 20


[2m[36m(RolloutWorker pid=1559185)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1559185)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1559185)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1559188)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1559188)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1559188)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1559186)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1559186)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1559186)[0m   logger.warn(f"Box 

> Algo: SAC | Training task: humanoid-walk | Seed: 0


[2m[36m(RolloutWorker pid=1567977)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1567977)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1567977)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1567980)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1567980)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1567980)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1567984)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1567984)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1567984)[0m   logger.warn(f"Box 