Skip to content

Commit

Permalink
[RLlib] Add "official" benchmark script for Atari PPO benchmarks (new…
Browse files Browse the repository at this point in the history
… API stack). (#45697)
  • Loading branch information
sven1977 committed Jun 5, 2024
1 parent c4a87ee commit d49f15b
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 59 deletions.
4 changes: 2 additions & 2 deletions release/release_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2796,8 +2796,8 @@
cluster_compute: 8gpus_96cpus.yaml

run:
timeout: 600
script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --as-release-test
timeout: 1200
script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test

alert: default

Expand Down
16 changes: 8 additions & 8 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2379,7 +2379,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--enable-new-api-stack", "--as-test", "--stop-reward=450.0", "--num-cpus=6", "--evaluation-duration=auto"]
args = ["--enable-new-api-stack", "--as-test", "--evaluation-parallel-to-training", "--stop-reward=450.0", "--num-cpus=6", "--evaluation-duration=auto"]
)

py_test(
Expand All @@ -2388,7 +2388,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples", "examples_use_all_core"],
size = "large",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=900.0", "--num-cpus=6", "--evaluation-duration=auto", "--evaluation-duration-unit=episodes"]
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--evaluation-parallel-to-training", "--stop-reward=900.0", "--num-cpus=6", "--evaluation-duration=auto", "--evaluation-duration-unit=episodes"]
)

py_test(
Expand All @@ -2397,7 +2397,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--enable-new-api-stack", "--as-test", "--stop-reward=450.0", "--num-cpus=6", "--evaluation-num-env-runners=3", "--evaluation-duration=511", "--evaluation-duration-unit=timesteps"]
args = ["--enable-new-api-stack", "--as-test", "--evaluation-parallel-to-training", "--stop-reward=450.0", "--num-cpus=6", "--evaluation-num-env-runners=3", "--evaluation-duration=511", "--evaluation-duration-unit=timesteps"]
)

py_test(
Expand All @@ -2406,7 +2406,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=900.0", "--num-cpus=6", "--evaluation-duration=1001", "--evaluation-duration-unit=timesteps"]
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--evaluation-parallel-to-training", "--stop-reward=900.0", "--num-cpus=6", "--evaluation-duration=1001", "--evaluation-duration-unit=timesteps"]
)

py_test(
Expand All @@ -2415,7 +2415,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--enable-new-api-stack", "--as-test", "--stop-reward=450.0", "--num-cpus=6", "--evaluation-duration=13", "--evaluation-duration-unit=episodes"]
args = ["--enable-new-api-stack", "--as-test", "--evaluation-parallel-to-training", "--stop-reward=450.0", "--num-cpus=6", "--evaluation-duration=13", "--evaluation-duration-unit=episodes"]
)

py_test(
Expand All @@ -2424,7 +2424,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=900.0", "--num-cpus=6", "--evaluation-duration=10", "--evaluation-duration-unit=episodes"]
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--evaluation-parallel-to-training", "--stop-reward=900.0", "--num-cpus=6", "--evaluation-duration=10", "--evaluation-duration-unit=episodes"]
)

# @OldAPIStack
Expand All @@ -2434,7 +2434,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--as-test", "--stop-reward=50.0", "--num-cpus=6", "--evaluation-duration=auto"]
args = ["--as-test", "--evaluation-parallel-to-training", "--stop-reward=50.0", "--num-cpus=6", "--evaluation-duration=auto"]
)

# @OldAPIStack
Expand All @@ -2444,7 +2444,7 @@ py_test(
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/evaluation/evaluation_parallel_to_training.py"],
args = ["--as-test", "--framework=torch", "--stop-reward=30.0", "--num-cpus=6", "--evaluation-num-env-runners=3", "--evaluation-duration=211", "--evaluation-duration-unit=timesteps"]
args = ["--as-test", "--evaluation-parallel-to-training", "--framework=torch", "--stop-reward=30.0", "--num-cpus=6", "--evaluation-num-env-runners=3", "--evaluation-duration=211", "--evaluation-duration-unit=timesteps"]
)

# subdirectory: gpus/
Expand Down
123 changes: 123 additions & 0 deletions rllib/benchmarks/ppo/benchmark_atari_ppo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Script to execute RLlib's official PPO Atari benchmarks.
How to run this script
----------------------
`python [script-name].py --enable-new-api-stack --stop-timesteps 12000000
--num-gpus=4 --num-env-runners=95`
In order to only run individual or lists of envs, you can provide a list of env-strings
under the `--env` arg, such as `--env ALE/Pong-v5,ALE/Breakout-v5`.
For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`
Results to expect
-----------------
TODO (sven): Link to RLlib's to-be-created benchmark page.
"""
import subprocess

from ray.rllib.utils.test_utils import add_rllib_example_script_args


parser = add_rllib_example_script_args()

# Might need `gymnasium[atari, other]` to be installed.

# See the following links for benchmark results of other libraries:
# Original paper: https://arxiv.org/abs/1812.05905
# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE
# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks
# [0] = reward to expect for DQN rainbow [1] = timesteps to run (always 200M for DQN
# rainbow).
# Note that for PPO, we simply run everything for 6M ts.
benchmark_envs = {
"ALE/Alien-v5": (6022.9, 200000000),
"ALE/Amidar-v5": (202.8, 200000000),
"ALE/Assault-v5": (14491.7, 200000000),
"ALE/Asterix-v5": (280114.0, 200000000),
"ALE/Asteroids-v5": (2249.4, 200000000),
"ALE/Atlantis-v5": (814684.0, 200000000),
"ALE/BankHeist-v5": (826.0, 200000000),
"ALE/BattleZone-v5": (52040.0, 200000000),
"ALE/BeamRider-v5": (21768.5, 200000000),
"ALE/Berzerk-v5": (1793.4, 200000000),
"ALE/Bowling-v5": (39.4, 200000000),
"ALE/Boxing-v5": (54.9, 200000000),
"ALE/Breakout-v5": (379.5, 200000000),
"ALE/Centipede-v5": (7160.9, 200000000),
"ALE/ChopperCommand-v5": (10916.0, 200000000),
"ALE/CrazyClimber-v5": (143962.0, 200000000),
"ALE/Defender-v5": (47671.3, 200000000),
"ALE/DemonAttack-v5": (109670.7, 200000000),
"ALE/DoubleDunk-v5": (-0.6, 200000000),
"ALE/Enduro-v5": (2061.1, 200000000),
"ALE/FishingDerby-v5": (22.6, 200000000),
"ALE/Freeway-v5": (29.1, 200000000),
"ALE/Frostbite-v5": (4141.1, 200000000),
"ALE/Gopher-v5": (72595.7, 200000000),
"ALE/Gravitar-v5": (567.5, 200000000),
"ALE/Hero-v5": (50496.8, 200000000),
"ALE/IceHockey-v5": (-11685.8, 200000000),
"ALE/Kangaroo-v5": (10841.0, 200000000),
"ALE/Krull-v5": (6715.5, 200000000),
"ALE/KungFuMaster-v5": (28999.8, 200000000),
"ALE/MontezumaRevenge-v5": (154.0, 200000000),
"ALE/MsPacman-v5": (2570.2, 200000000),
"ALE/NameThisGame-v5": (11686.5, 200000000),
"ALE/Phoenix-v5": (103061.6, 200000000),
"ALE/Pitfall-v5": (-37.6, 200000000),
"ALE/Pong-v5": (19.0, 200000000),
"ALE/PrivateEye-v5": (1704.4, 200000000),
"ALE/Qbert-v5": (18397.6, 200000000),
"ALE/RoadRunner-v5": (54261.0, 200000000),
"ALE/Robotank-v5": (55.2, 200000000),
"ALE/Seaquest-v5": (19176.0, 200000000),
"ALE/Skiing-v5": (-11685.8, 200000000),
"ALE/Solaris-v5": (2860.7, 200000000),
"ALE/SpaceInvaders-v5": (12629.0, 200000000),
"ALE/StarGunner-v5": (123853.0, 200000000),
"ALE/Surround-v5": (7.0, 200000000),
"ALE/Tennis-v5": (-2.2, 200000000),
"ALE/TimePilot-v5": (11190.5, 200000000),
"ALE/Tutankham-v5": (126.9, 200000000),
"ALE/Venture-v5": (45.0, 200000000),
"ALE/VideoPinball-v5": (506817.2, 200000000),
"ALE/WizardOfWor-v5": (14631.5, 200000000),
"ALE/YarsRevenge-v5": (93007.9, 200000000),
"ALE/Zaxxon-v5": (19658.0, 200000000),
}


if __name__ == "__main__":
args = parser.parse_args()

# Compile the base command running the actual `tuned_example` script.
base_commands = [
"python",
"../../tuned_examples/ppo/atari_ppo.py",
"--enable-new-api-stack",
f"--num-env-runners={args.num_env_runners}" if args.num_env_runners else "",
f"--num-gpus={args.num_gpus}",
f"--wandb-key={args.wandb_key}" if args.wandb_key else "",
f"--wandb-project={args.wandb_project}" if args.wandb_project else "",
f"--wandb-run-name={args.wandb_run_name}" if args.wandb_run_name else "",
f"--stop-timesteps={args.stop_timesteps}",
f"--checkpoint-freq={args.checkpoint_freq}",
"--checkpoint-at-end" if args.checkpoint_at_end else "",
]

# Loop through all envs (given on command line or found in `benchmark_envs` and
# run the `tuned_example` script for each of them.
for env_name in args.env.split(",") if args.env else benchmark_envs.keys():
# Remove missing commands.
commands = []
for c in base_commands:
if c != "":
commands.append(c)
commands.append(f"--env={env_name}")
commands.append(f"--wandb-run-name={env_name}")
print(f"Running {env_name} through command line=`{commands}`")
subprocess.run(commands)
1 change: 0 additions & 1 deletion rllib/examples/evaluation/custom_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@
parser = add_rllib_example_script_args(
default_iters=50, default_reward=0.7, default_timesteps=50000
)
parser.add_argument("--evaluation-parallel-to-training", action="store_true")
parser.add_argument("--no-custom-eval", action="store_true")
parser.add_argument("--corridor-length-training", type=int, default=10)
parser.add_argument("--corridor-length-eval-worker-1", type=int, default=20)
Expand Down
41 changes: 6 additions & 35 deletions rllib/examples/evaluation/evaluation_parallel_to_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
| 71.7485 | 100000 | 476.51 | 476.51 |
+------------------+--------+----------+--------------------+
When running without parallel evaluation (`--evaluation-not-parallel-to-training` flag),
When running without parallel evaluation (no `--evaluation-parallel-to-training` flag),
the experiment takes considerably longer (~70sec vs ~80sec):
+-----------------------------+------------+-----------------+--------+
| Trial name | status | loc | iter |
Expand Down Expand Up @@ -89,37 +89,10 @@
from ray.tune.registry import get_trainable_cls, register_env

parser = add_rllib_example_script_args(default_reward=500.0)
parser.add_argument(
"--evaluation-duration",
type=lambda v: v if v == "auto" else int(v),
default="auto",
help="Number of evaluation episodes/timesteps to run each iteration. "
"If 'auto', will run as many as possible during train pass.",
)
parser.add_argument(
"--evaluation-duration-unit",
type=str,
default="timesteps",
choices=["episodes", "timesteps"],
help="The unit in which to measure the duration (`episodes` or `timesteps`).",
)
parser.add_argument(
"--evaluation-not-parallel-to-training",
action="store_true",
help="Whether to NOT run evaluation parallel to training, but in sequence.",
)
parser.add_argument(
"--evaluation-num-env-runners",
type=int,
default=2,
help="The number of evaluation EnvRunners to setup. "
"0 for a single local evaluation EnvRunner.",
)
parser.add_argument(
"--evaluation-interval",
type=int,
default=1,
help="Every how many train iterations should we run an evaluation loop?",
parser.set_defaults(
evaluation_num_env_runners=2,
evaluation_interval=1,
evaluation_duration_unit="timesteps",
)
parser.add_argument(
"--evaluation-parallel-to-training-wo-thread",
Expand Down Expand Up @@ -219,9 +192,7 @@ def on_train_result(
.evaluation(
# Parallel evaluation+training config.
# Switch on evaluation in parallel with training.
evaluation_parallel_to_training=(
not args.evaluation_not_parallel_to_training
),
evaluation_parallel_to_training=args.evaluation_parallel_to_training,
# Use two evaluation workers. Must be >0, otherwise,
# evaluation will run on a local worker and block (no parallelism).
evaluation_num_env_runners=args.evaluation_num_env_runners,
Expand Down
20 changes: 7 additions & 13 deletions rllib/tuned_examples/ppo/atari_ppo.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import gymnasium as gym

from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.connectors.env_to_module.frame_stacking import FrameStackingEnvToModule
from ray.rllib.connectors.learner.frame_stacking import FrameStackingLearner
from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
from ray.rllib.utils.metrics import (
ENV_RUNNER_RESULTS,
EPISODE_RETURN_MEAN,
NUM_ENV_STEPS_SAMPLED_LIFETIME,
)
from ray.rllib.utils.test_utils import add_rllib_example_script_args
from ray import tune


parser = add_rllib_example_script_args()
parser = add_rllib_example_script_args(
default_reward=float("inf"),
default_timesteps=3000000,
default_iters=100000000000,
)
# Use `parser` to add your own custom command line options to this script
# and (if needed) use their values toset up `config` below.
args = parser.parse_args()
Expand Down Expand Up @@ -81,13 +80,8 @@ def _env_creator(cfg):
)
)

stop = {
f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 20.0,
NUM_ENV_STEPS_SAMPLED_LIFETIME: 1500000,
}


if __name__ == "__main__":
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment

run_rllib_example_script_experiment(config, args=args, stop=stop)
run_rllib_example_script_experiment(config, args=args)
52 changes: 52 additions & 0 deletions rllib/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,48 @@ def add_rllib_example_script_args(
"experiment is then the sum over all individual agents' rewards.",
)

# Evaluation options.
parser.add_argument(
"--evaluation-num-env-runners",
type=int,
default=0,
help="The number of evaluation (remote) EnvRunners to use for the experiment.",
)
parser.add_argument(
"--evaluation-interval",
type=int,
default=0,
help="Every how many iterations to run one round of evaluation. "
"Use 0 (default) to disable evaluation.",
)
parser.add_argument(
"--evaluation-duration",
type=lambda v: v if v == "auto" else int(v),
default=10,
help="The number of evaluation units to run each evaluation round. "
"Use `--evaluation-duration-unit` to count either in 'episodes' "
"or 'timesteps'. If 'auto', will run as many as possible during train pass ("
"`--evaluation-parallel-to-training` must be set then).",
)
parser.add_argument(
"--evaluation-duration-unit",
type=str,
default="episodes",
choices=["episodes", "timesteps"],
help="The evaluation duration unit to count by. One of 'episodes' or "
"'timesteps'. This unit will be run `--evaluation-duration` times in each "
"evaluation round. If `--evaluation-duration=auto`, this setting does not "
"matter.",
)
parser.add_argument(
"--evaluation-parallel-to-training",
action="store_true",
help="Whether to run evaluation parallel to training. This might help speed up "
"your overall iteration time. Be aware that when using this option, your "
"reported evaluation results are referring to one iteration before the current "
"one.",
)

# tune.Tuner options.
parser.add_argument(
"--no-tune",
Expand Down Expand Up @@ -1434,6 +1476,16 @@ def run_rllib_example_script_experiment(
num_cpus_for_main_process=1,
)

# Evaluation setup.
if args.evaluation_interval > 0:
config.evaluation(
evaluation_num_env_runners=args.evaluation_num_env_runners,
evaluation_interval=args.evaluation_interval,
evaluation_duration=args.evaluation_duration,
evaluation_duration_unit=args.evaluation_duration_unit,
evaluation_parallel_to_training=args.evaluation_parallel_to_training,
)

# Run the experiment w/o Tune (directly operate on the RLlib Algorithm object).
if args.no_tune:
assert not args.as_test and not args.as_release_test
Expand Down

0 comments on commit d49f15b

Please sign in to comment.