From 4adb78b2bf3c968f88f72ae9064189b846833230 Mon Sep 17 00:00:00 2001
From: Sven Mika <svenmika1977@gmail.com>
Date: Tue, 4 Jun 2024 09:39:28 +0200
Subject: [PATCH] [RLlib] Activate DreamerV3 weekly release test (on Pong-v5
 with the 100k setup). (#45654)

---
 release/ray_release/byod/byod_rllib_test.sh  |  5 ++-
 release/release_tests.yaml                   | 40 +++++++++++++++++++-
 release/rllib_tests/1gpu_4cpus.yaml          |  2 +-
 rllib/tuned_examples/dreamerv3/atari_100k.py |  8 ++--
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/release/ray_release/byod/byod_rllib_test.sh b/release/ray_release/byod/byod_rllib_test.sh
index 358227567d31b..66d5d73e8289a 100755
--- a/release/ray_release/byod/byod_rllib_test.sh
+++ b/release/ray_release/byod/byod_rllib_test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is used to build an extra layer on top of the base anyscale/ray image 
-# to run the agent stress test.
+# to run RLlib release tests.
 
 set -exo pipefail
 
@@ -14,3 +14,6 @@ pip3 install werkzeug==2.3.8
 
 # not strictly necessary, but makes debugging easier
 git clone https://github.com/ray-project/ray.git
+
+# Only DreamerV3 still uses tf on the new API stack. But requires tf==2.11.1 to run.
+pip3 install tensorflow==2.11.1 tensorflow_probability==0.19.0
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
index b611026436a44..14ea3c43aea32 100644
--- a/release/release_tests.yaml
+++ b/release/release_tests.yaml
@@ -2739,6 +2739,42 @@
 # Learning and benchmarking tests
 # ----------------------------------------------------------
 
+# --------------------------
+# DreamerV3
+# --------------------------
+# TODO (sven): Move algo and this test to pytorch
+- name: rllib_learning_tests_pong_dreamerv3_tf2
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  stable: true
+
+  frequency: weekly
+  team: rllib
+  cluster:
+    byod:
+      type: gpu
+      post_build_script: byod_rllib_test.sh
+      runtime_env:
+        - RLLIB_TEST_NO_JAX_IMPORT=1
+        - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin
+    cluster_compute: 1gpu_4cpus.yaml
+
+  run:
+    timeout: 43200  # 12h
+    script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test
+
+  alert: default
+
+  variations:
+    - __suffix__: aws
+    - __suffix__: gce
+      env: gce
+      frequency: manual
+      cluster:
+        cluster_compute: 1gpu_4cpus_gce.yaml
+
+
 # --------------------------
 # PPO
 # --------------------------
@@ -2760,8 +2796,8 @@
     cluster_compute: 8gpus_96cpus.yaml
 
   run:
-    timeout: 1800
-    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-test
+    timeout: 600
+    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-release-test
 
   alert: default
 
diff --git a/release/rllib_tests/1gpu_4cpus.yaml b/release/rllib_tests/1gpu_4cpus.yaml
index f55eb37cfc633..fa1b042a92608 100644
--- a/release/rllib_tests/1gpu_4cpus.yaml
+++ b/release/rllib_tests/1gpu_4cpus.yaml
@@ -5,7 +5,7 @@ max_workers: 0
 
 head_node_type:
     name: head_node
-    instance_type: p2.xlarge
+    instance_type: g5.2xlarge
 
 worker_node_types: []
 
diff --git a/rllib/tuned_examples/dreamerv3/atari_100k.py b/rllib/tuned_examples/dreamerv3/atari_100k.py
index 68bdc97451366..443ce9b13d163 100644
--- a/rllib/tuned_examples/dreamerv3/atari_100k.py
+++ b/rllib/tuned_examples/dreamerv3/atari_100k.py
@@ -20,7 +20,7 @@
 parser = add_rllib_example_script_args(
     default_iters=1000000,
     default_reward=20.0,
-    default_timesteps=1000000,
+    default_timesteps=100000,
 )
 # Use `parser` to add your own custom command line options to this script
 # and (if needed) use their values toset up `config` below.
@@ -59,8 +59,8 @@
     )
     .reporting(
         metrics_num_episodes_for_smoothing=(args.num_gpus or 1),
-        report_images_and_videos=True,
-        report_dream_data=True,
+        report_images_and_videos=False,
+        report_dream_data=False,
         report_individual_batch_item_stats=False,
     )
     # See Appendix A.
@@ -75,4 +75,4 @@
 if __name__ == "__main__":
     from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
 
-    run_rllib_example_script_experiment(config, args, stop={}, keep_config=True)
+    run_rllib_example_script_experiment(config, args, keep_config=True)