[RLlib] Activate DreamerV3 weekly release test (on Pong-v5 with the 1…

…00k setup). (#45654)
ray-project · Jun 4, 2024 · 4adb78b · 4adb78b
1 parent 4f8eb2f
commit 4adb78b
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 8 deletions.
diff --git a/release/ray_release/byod/byod_rllib_test.sh b/release/ray_release/byod/byod_rllib_test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is used to build an extra layer on top of the base anyscale/ray image 
-# to run the agent stress test.
+# to run RLlib release tests.
 
 set -exo pipefail
 
@@ -14,3 +14,6 @@ pip3 install werkzeug==2.3.8
 
 # not strictly necessary, but makes debugging easier
 git clone https://github.com/ray-project/ray.git
+
+# Only DreamerV3 still uses tf on the new API stack. But requires tf==2.11.1 to run.
+pip3 install tensorflow==2.11.1 tensorflow_probability==0.19.0
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -2739,6 +2739,42 @@
 # Learning and benchmarking tests
 # ----------------------------------------------------------
 
+# --------------------------
+# DreamerV3
+# --------------------------
+# TODO (sven): Move algo and this test to pytorch
+- name: rllib_learning_tests_pong_dreamerv3_tf2
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  stable: true
+
+  frequency: weekly
+  team: rllib
+  cluster:
+    byod:
+      type: gpu
+      post_build_script: byod_rllib_test.sh
+      runtime_env:
+        - RLLIB_TEST_NO_JAX_IMPORT=1
+        - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin
+    cluster_compute: 1gpu_4cpus.yaml
+
+  run:
+    timeout: 43200  # 12h
+    script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test
+
+  alert: default
+
+  variations:
+    - __suffix__: aws
+    - __suffix__: gce
+      env: gce
+      frequency: manual
+      cluster:
+        cluster_compute: 1gpu_4cpus_gce.yaml
+
+
 # --------------------------
 # PPO
 # --------------------------
@@ -2760,8 +2796,8 @@
     cluster_compute: 8gpus_96cpus.yaml
 
   run:
-    timeout: 1800
-    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-test
+    timeout: 600
+    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-release-test
 
   alert: default
 

diff --git a/release/rllib_tests/1gpu_4cpus.yaml b/release/rllib_tests/1gpu_4cpus.yaml
@@ -5,7 +5,7 @@ max_workers: 0
 
 head_node_type:
     name: head_node
-    instance_type: p2.xlarge
+    instance_type: g5.2xlarge
 
 worker_node_types: []
 

diff --git a/rllib/tuned_examples/dreamerv3/atari_100k.py b/rllib/tuned_examples/dreamerv3/atari_100k.py
@@ -20,7 +20,7 @@
 parser = add_rllib_example_script_args(
     default_iters=1000000,
     default_reward=20.0,
-    default_timesteps=1000000,
+    default_timesteps=100000,
 )
 # Use `parser` to add your own custom command line options to this script
 # and (if needed) use their values toset up `config` below.
@@ -59,8 +59,8 @@
     )
     .reporting(
         metrics_num_episodes_for_smoothing=(args.num_gpus or 1),
-        report_images_and_videos=True,
-        report_dream_data=True,
+        report_images_and_videos=False,
+        report_dream_data=False,
         report_individual_batch_item_stats=False,
     )
     # See Appendix A.
@@ -75,4 +75,4 @@
 if __name__ == "__main__":
     from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
 
-    run_rllib_example_script_experiment(config, args, stop={}, keep_config=True)
+    run_rllib_example_script_experiment(config, args, keep_config=True)