From 4adb78b2bf3c968f88f72ae9064189b846833230 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Tue, 4 Jun 2024 09:39:28 +0200 Subject: [PATCH] [RLlib] Activate DreamerV3 weekly release test (on Pong-v5 with the 100k setup). (#45654) --- release/ray_release/byod/byod_rllib_test.sh | 5 ++- release/release_tests.yaml | 40 +++++++++++++++++++- release/rllib_tests/1gpu_4cpus.yaml | 2 +- rllib/tuned_examples/dreamerv3/atari_100k.py | 8 ++-- 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/release/ray_release/byod/byod_rllib_test.sh b/release/ray_release/byod/byod_rllib_test.sh index 358227567d31b..66d5d73e8289a 100755 --- a/release/ray_release/byod/byod_rllib_test.sh +++ b/release/ray_release/byod/byod_rllib_test.sh @@ -1,6 +1,6 @@ #!/bin/bash # This script is used to build an extra layer on top of the base anyscale/ray image -# to run the agent stress test. +# to run RLlib release tests. set -exo pipefail @@ -14,3 +14,6 @@ pip3 install werkzeug==2.3.8 # not strictly necessary, but makes debugging easier git clone https://github.com/ray-project/ray.git + +# Only DreamerV3 still uses tf on the new API stack. But requires tf==2.11.1 to run. +pip3 install tensorflow==2.11.1 tensorflow_probability==0.19.0 diff --git a/release/release_tests.yaml b/release/release_tests.yaml index b611026436a44..14ea3c43aea32 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2739,6 +2739,42 @@ # Learning and benchmarking tests # ---------------------------------------------------------- +# -------------------------- +# DreamerV3 +# -------------------------- +# TODO (sven): Move algo and this test to pytorch +- name: rllib_learning_tests_pong_dreamerv3_tf2 + group: RLlib tests + working_dir: rllib_tests + + stable: true + + frequency: weekly + team: rllib + cluster: + byod: + type: gpu + post_build_script: byod_rllib_test.sh + runtime_env: + - RLLIB_TEST_NO_JAX_IMPORT=1 + - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin + cluster_compute: 1gpu_4cpus.yaml + + run: + timeout: 43200 # 12h + script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test + + alert: default + + variations: + - __suffix__: aws + - __suffix__: gce + env: gce + frequency: manual + cluster: + cluster_compute: 1gpu_4cpus_gce.yaml + + # -------------------------- # PPO # -------------------------- @@ -2760,8 +2796,8 @@ cluster_compute: 8gpus_96cpus.yaml run: - timeout: 1800 - script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-test + timeout: 600 + script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-release-test alert: default diff --git a/release/rllib_tests/1gpu_4cpus.yaml b/release/rllib_tests/1gpu_4cpus.yaml index f55eb37cfc633..fa1b042a92608 100644 --- a/release/rllib_tests/1gpu_4cpus.yaml +++ b/release/rllib_tests/1gpu_4cpus.yaml @@ -5,7 +5,7 @@ max_workers: 0 head_node_type: name: head_node - instance_type: p2.xlarge + instance_type: g5.2xlarge worker_node_types: [] diff --git a/rllib/tuned_examples/dreamerv3/atari_100k.py b/rllib/tuned_examples/dreamerv3/atari_100k.py index 68bdc97451366..443ce9b13d163 100644 --- a/rllib/tuned_examples/dreamerv3/atari_100k.py +++ b/rllib/tuned_examples/dreamerv3/atari_100k.py @@ -20,7 +20,7 @@ parser = add_rllib_example_script_args( default_iters=1000000, default_reward=20.0, - default_timesteps=1000000, + default_timesteps=100000, ) # Use `parser` to add your own custom command line options to this script # and (if needed) use their values toset up `config` below. @@ -59,8 +59,8 @@ ) .reporting( metrics_num_episodes_for_smoothing=(args.num_gpus or 1), - report_images_and_videos=True, - report_dream_data=True, + report_images_and_videos=False, + report_dream_data=False, report_individual_batch_item_stats=False, ) # See Appendix A. @@ -75,4 +75,4 @@ if __name__ == "__main__": from ray.rllib.utils.test_utils import run_rllib_example_script_experiment - run_rllib_example_script_experiment(config, args, stop={}, keep_config=True) + run_rllib_example_script_experiment(config, args, keep_config=True)