ray-project · sven1977 · Mar 20, 2024 · Mar 18, 2024 · Mar 18, 2024 · Mar 18, 2024
@@ -0,0 +1,13 @@
+.. note::
+
+    With Ray 2.10.0, we announce that RLlib's "new API stack" has entered alpha stage.
+    Throughout the next minor releases - up to Ray 3 - we will move more
+    algorithms, example scripts, and the documentation over and into this new code base
+    thereby slowly replacing the "old API stack" (e.g. ModelV2, Policy, RolloutWorker).
+
+    Note, however, that so far only PPO (single- and multi-agent) and SAC (single-agent only)
+    support the "new API stack" (and continue to run by default with the old APIs)
+    and that you will be able to continue using your existing custom (old stack) classes
+    and setups for the foreseeable future.
+
+    `Click here </rllib/package_ref/rllib-new-api-stack.html>`__ for more details on how to use the new API stack.
@@ -0,0 +1,3 @@
+.. note::
+
+    This doc is related to RLlib's `new API stack </rllib/package_ref/rllib-new-api-stack.html>`__ and therefore experimental.
@@ -0,0 +1,149 @@
+# __enabling-new-api-stack-sa-ppo-begin__
+
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner
+
+
+config = (
+    PPOConfig()
+    .environment("CartPole-v1")
+
+    # Switch the new API stack flag to True (False by default).
+    # This enables the use of the RLModule (replaces ModelV2) AND Learner (replaces
+    # Policy) classes.
+    .experimental(_enable_new_api_stack=True)
+
+    # However, the above flag only activates the RLModule and Learner APIs. In order
+    # to utilize all of the new API stack's classes, you also have to specify the
+    # EnvRunner (replaces RolloutWorker) to use.
+    # Note that this step will be fully automated in the next release.
+    # Set the `env_runner_cls` to `SingleAgentEnvRunner` for single-agent setups and
+    # `MultiAgentEnvRunner` for multi-agent cases.
+    .rollouts(env_runner_cls=SingleAgentEnvRunner)
+
+    # We are using a simple 1-CPU setup here for learning. However, as the new stack
+    # supports arbitrary scaling on the learner axis, feel free to set
+    # `num_learner_workers` to the number of available GPUs for multi-GPU training (and
+    # `num_gpus_per_learner_worker=1`).
+    .resources(
+        num_learner_workers=0,  # <- normally, you'd set this to the number of GPUs
+        num_gpus_per_learner_worker=0,  # <- set this to 1, iff you have at least 1 GPU.
+        num_cpus_for_local_worker=1,
+    )
+
+    # When using RLlib's default models (RLModules) AND the new EnvRunners, you should
+    # set this flag in your model config. Having to set this, will no longer be required
+    # in the near future. It does yield a small performance advantage as value function
+    # predictions for PPO are no longer required to happen on the sampler side (but are
+    # now fully located on the learner side, which might have GPUs available).
+    .training(model={"uses_new_env_runners": True})
+)
+
+# __enabling-new-api-stack-sa-ppo-end__
+
+# Test whether it works.
+print(config.build().train())
+
+
+# __enabling-new-api-stack-ma-ppo-begin__
+
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.env.multi_agent_env_runner import MultiAgentEnvRunner
+from ray.rllib.examples.env.multi_agent import MultiAgentCartPole
+
+
+# A typical multi-agent setup (otherwise using the exact same parameters as before)
+# would look like this.
+config = (
+    PPOConfig()
+    .environment(MultiAgentCartPole, env_config={"num_agents": 2})
+
+    # Switch the new API stack flag to True (False by default).
+    # This enables the use of the RLModule (replaces ModelV2) AND Learner (replaces
+    # Policy) classes.
+    .experimental(_enable_new_api_stack=True)
+    # However, the above flag only activates the RLModule and Learner APIs. In order
+    # to utilize all of the new API stack's classes, you also have to specify the
+    # EnvRunner (replaces RolloutWorker) to use.
+    # Note that this step will be fully automated in the next release.
+    # Set the `env_runner_cls` to `SingleAgentEnvRunner` for single-agent setups and
+    # `MultiAgentEnvRunner` for multi-agent cases.
+    .rollouts(env_runner_cls=MultiAgentEnvRunner)
+
+    # We are using a simple 1-CPU setup here for learning. However, as the new stack
+    # supports arbitrary scaling on the learner axis, feel free to set
+    # `num_learner_workers` to the number of available GPUs for multi-GPU training (and
+    # `num_gpus_per_learner_worker=1`).
+    .resources(
+        num_learner_workers=0,  # <- normally, you'd set this to the number of GPUs
+        num_gpus_per_learner_worker=0,  # <- set this to 1, iff you have at least 1 GPU.
+        num_cpus_for_local_worker=1,
+    )
+
+    # When using RLlib's default models (RLModules) AND the new EnvRunners, you should
+    # set this flag in your model config. Having to set this, will no longer be required
+    # in the near future. It does yield a small performance advantage as value function
+    # predictions for PPO are no longer required to happen on the sampler side (but are
+    # now fully located on the learner side, which might have GPUs available).
+    .training(model={"uses_new_env_runners": True})
+
+    # B/c we are in a multi-agent env, we have to setup the usual multi-agent stuff:
+    .multi_agent(
+        policies={"p0", "p1"},
+        # Map agent 0 to p0 and agent 1 to p1.
+        policy_mapping_fn=lambda agent_id, episode, **kwargs: f"p{agent_id}"
+    )
+)
+
+# __enabling-new-api-stack-ma-ppo-end__
+
+# Test whether it works.
+print(config.build().train())
+
+
+# __enabling-new-api-stack-sa-sac-begin__
+
+from ray.rllib.algorithms.sac import SACConfig
+from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner
+
+
+config = (
+    SACConfig()
+    .environment("Pendulum-v1")
+
+    # Switch the new API stack flag to True (False by default).
+    # This enables the use of the RLModule (replaces ModelV2) AND Learner (replaces
+    # Policy) classes.
+    .experimental(_enable_new_api_stack=True)
+
+    # However, the above flag only activates the RLModule and Learner APIs. In order
+    # to utilize all of the new API stack's classes, you also have to specify the
+    # EnvRunner (replaces RolloutWorker) to use.
+    # Note that this step will be fully automated in the next release.
+    .rollouts(env_runner_cls=SingleAgentEnvRunner)
+
+    # We are using a simple 1-CPU setup here for learning. However, as the new stack
+    # supports arbitrary scaling on the learner axis, feel free to set
+    # `num_learner_workers` to the number of available GPUs for multi-GPU training (and
+    # `num_gpus_per_learner_worker=1`).
+    .resources(
+        num_learner_workers=0,  # <- normally, you'd set this to the number of GPUs
+        num_gpus_per_learner_worker=0,  # <- set this to 1, iff you have at least 1 GPU.
+        num_cpus_for_local_worker=1,
+    )
+
+    # When using RLlib's default models (RLModules) AND the new EnvRunners, you should
+    # set this flag in your model config. Having to set this, will no longer be required
+    # in the near future. It does yield a small performance advantage as value function
+    # predictions for PPO are no longer required to happen on the sampler side (but are
+    # now fully located on the learner side, which might have GPUs available).
+    .training(
+        model={"uses_new_env_runners": True},
+        replay_buffer_config={"type": "EpisodeReplayBuffer"},
+    )
+)
+# __enabling-new-api-stack-sa-sac-end__
+
+
+# Test whether it works.
+print(config.build().train())
@@ -1,5 +1,7 @@
 .. include:: /_includes/rllib/we_are_hiring.rst
 
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _rllib-index:
 
 RLlib: Industry-Grade Reinforcement Learning
@@ -14,6 +16,7 @@ RLlib: Industry-Grade Reinforcement Learning
     rllib-algorithms
     user-guides
     rllib-examples
+    rllib-new-api-stack
     package_ref/index
 
 

@@ -1,7 +1,7 @@
 
 .. include:: /_includes/rllib/we_are_hiring.rst
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
+.. include:: /_includes/rllib/new_api_stack.rst
 
 .. TODO: We need algorithms, environments, policies, models here. Likely in that order.
     Execution plans are not a "core" concept for users. Sample batches should probably also be left out.

@@ -1,6 +1,9 @@
-.. algorithm-reference-docs:
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+.. algorithm-reference-docs:
 
 Algorithms
 ==========

@@ -1,8 +1,12 @@
-.. _catalog-reference-docs:
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+.. include:: /_includes/rllib/new_api_stack_component.rst
 
-.. include:: /_includes/rllib/rlmodules_rollout.rst
+
+.. _catalog-reference-docs:
 
 Catalog API
 ===========

@@ -1,3 +1,9 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+
 .. _env-reference-docs:
 
 Environments

@@ -1,3 +1,8 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _base-env-reference-docs:
 
 BaseEnv API

@@ -1,3 +1,8 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _external-env-reference-docs:
 
 ExternalEnv API

@@ -1,3 +1,8 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _multi-agent-env-reference-docs:
 
 MultiAgentEnv API

@@ -1,3 +1,7 @@
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _vector-env-reference-docs:
 
 VectorEnv API

@@ -1,3 +1,9 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+
 .. _evaluation-reference-docs:
 
 Sampling the Environment or offline data

@@ -1,3 +1,8 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 External Application API
 ------------------------
 

@@ -1,6 +1,7 @@
+
 .. include:: /_includes/rllib/we_are_hiring.rst
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
+.. include:: /_includes/rllib/new_api_stack.rst
 
 .. _rllib-reference-docs:
 

@@ -1,3 +1,10 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+.. include:: /_includes/rllib/new_api_stack_component.rst
+
 .. _learner-reference-docs:
 
 

@@ -1,6 +1,9 @@
-.. _model-reference-docs:
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
+
+.. _model-reference-docs:
 
 Model APIs
 ==========

@@ -1,6 +1,9 @@
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _policy-reference-docs:
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
 
 Policy API
 ==========

@@ -1,3 +1,6 @@
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
 
 Building Custom Policy Classes
 ------------------------------

@@ -1,3 +1,8 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _replay-buffer-api-reference-docs:
 
 Replay Buffer API

@@ -1,4 +1,9 @@
 
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
+.. include:: /_includes/rllib/new_api_stack_component.rst
 
 .. _rlmodule-reference-docs:
 

@@ -1,3 +1,8 @@
+
+.. include:: /_includes/rllib/we_are_hiring.rst
+
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _utils-reference-docs:
 
 RLlib Utilities

@@ -1,4 +1,6 @@
 
+.. include:: /_includes/rllib/new_api_stack.rst
+
 .. _rllib-advanced-api-doc:
 
 Advanced Python APIs

@@ -1,6 +1,6 @@
 .. include:: /_includes/rllib/we_are_hiring.rst
 
-.. include:: /_includes/rllib/rlm_learner_migration_banner.rst
+.. include:: /_includes/rllib/new_api_stack.rst
 
 .. _rllib-algorithms-doc:
 

@@ -1,6 +1,8 @@
 .. include:: /_includes/rllib/we_are_hiring.rst
 
-.. include:: /_includes/rllib/rlmodules_rollout.rst
+.. include:: /_includes/rllib/new_api_stack.rst
+
+.. include:: /_includes/rllib/new_api_stack_component.rst
 
 
 Catalog (Alpha)