From 8f85a70265293f20df1fdef48ae112a5da936097 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 2 May 2024 12:08:39 +0200 Subject: [PATCH 1/4] wip Signed-off-by: sven1977 --- rllib/env/multi_agent_env.py | 12 +- .../examples/envs/custom_env_render_method.py | 203 ++++++++++++++++++ 2 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 rllib/examples/envs/custom_env_render_method.py diff --git a/rllib/env/multi_agent_env.py b/rllib/env/multi_agent_env.py index 19554ff24444..78be0ec26db8 100644 --- a/rllib/env/multi_agent_env.py +++ b/rllib/env/multi_agent_env.py @@ -2,6 +2,8 @@ import logging from typing import Callable, Dict, List, Tuple, Optional, Union, Set, Type +import numpy as np + from ray.rllib.env.base_env import BaseEnv from ray.rllib.env.env_context import EnvContext from ray.rllib.utils.annotations import ( @@ -554,7 +556,15 @@ def step(self, action_dict): @override(MultiAgentEnv) def render(self): - return self.envs[0].render(self.render_mode) + # This render method simply renders all n underlying individual single-agent + # envs and concatenates their images (on top of each other if the returned + # images have dims where [width] > [height], otherwise next to each other). + render_images = [e.render() for e in self.envs] + if render_images[0].shape[1] > render_images[0].shape[0]: + concat_dim = 0 + else: + concat_dim = 1 + return np.concatenate(render_images, axis=concat_dim) return MultiEnv diff --git a/rllib/examples/envs/custom_env_render_method.py b/rllib/examples/envs/custom_env_render_method.py new file mode 100644 index 000000000000..ee75b1863a73 --- /dev/null +++ b/rllib/examples/envs/custom_env_render_method.py @@ -0,0 +1,203 @@ +"""Example of implementing a custom `render()` method for your gymnasium RL environment. + +This example: + - shows how to write a simple gym.Env class yourself, in this case a corridor env, + in which the agent starts at the left side of the corridor and has to reach the + goal state all the way at the right. + - in particular, the new class overrides the Env's `render()` method to show, how + you can write your own rendering logic. + - furthermore, we use the RLlib callbacks class introduced in this example here: + https://github.com/ray-project/ray/blob/master/rllib/examples/envs/env_rendering_and_recording.py # noqa + in order to compile videos of the worst and best performing episodes in each + iteration and log these videos to your WandB account, so you can view them. + + +How to run this script +---------------------- +`python [script file name].py --enable-new-api-stack +--wandb-key=[your WandB API key] --wandb-project=[some WandB project name] +--wandb-run-name=[optional: WandB run name within --wandb-project]` + +In order to see the actual videos, you need to have a WandB account and provide your +API key and a project name on the command line (see above). + +Use the `--num-agents` argument to set up the env as a multi-agent env. If +`--num-agents` > 0, RLlib will simply run as many of the defined single-agent +environments in parallel and with different policies to be trained for each agent. + +For debugging, use the following additional command line options +`--no-tune --num-env-runners=0` +which should allow you to set breakpoints anywhere in the RLlib code and +have the execution stop there for inspection and debugging. + + +Results to expect +----------------- +After the first training iteration, you should see the videos in your WandB account +under the provided `--wandb-project` name. Filter for "videos_best" or "videos_worst". + +Note that the default Tune TensorboardX (TBX) logger might complain about the videos +being logged. This is ok, the TBX logger will simply ignore these. The WandB logger, +however, will recognize the video tensors shaped +(1 [batch], T [video len], 3 [rgb], [height], [width]) and properly create a WandB video +object to be sent to their server. + +Your terminal output should look similar to this (the following is for a +`--num-agents=2` run; expect similar results for the other `--num-agents` +settings): ++---------------------+------------+----------------+--------+------------------+ +| Trial name | status | loc | iter | total time (s) | +|---------------------+------------+----------------+--------+------------------+ +| PPO_env_fb1c0_00000 | TERMINATED | 127.0.0.1:8592 | 3 | 21.1876 | ++---------------------+------------+----------------+--------+------------------+ ++-------+-------------------+-------------+-------------+ +| ts | combined return | return p1 | return p0 | +|-------+-------------------+-------------+-------------| +| 12000 | 12.7655 | 7.3605 | 5.4095 | ++-------+-------------------+-------------+-------------+ +""" + +import gymnasium as gym +import numpy as np +from gymnasium.spaces import Box, Discrete +from PIL import Image, ImageDraw + +from ray.rllib.algorithms.ppo import PPOConfig +from ray.rllib.env.multi_agent_env import make_multi_agent +from ray.rllib.examples.envs.env_rendering_and_recording import EnvRenderCallback +from ray.rllib.utils.test_utils import ( + add_rllib_example_script_args, + run_rllib_example_script_experiment, +) +from ray import tune + +parser = add_rllib_example_script_args( + default_iters=10, + default_reward=9.0, + default_timesteps=10000, +) + + +class CustomRenderedCorridorEnv(gym.Env): + """Example of a custom env, for which we specify rendering behavior.""" + + def __init__(self, config): + self.end_pos = config.get("corridor_length", 10) + self.max_steps = config.get("max_steps", 100) + self.cur_pos = 0 + self.steps = 0 + self.action_space = Discrete(2) + self.observation_space = Box(0.0, 999.0, shape=(1,), dtype=np.float32) + + def reset(self, *, seed=None, options=None): + self.cur_pos = 0.0 + self.steps = 0 + return np.array([self.cur_pos], np.float32), {} + + def step(self, action): + self.steps += 1 + assert action in [0, 1], action + if action == 0 and self.cur_pos > 0: + self.cur_pos -= 1.0 + elif action == 1: + self.cur_pos += 1.0 + truncated = self.steps >= self.max_steps + terminated = self.cur_pos >= self.end_pos + return ( + np.array([self.cur_pos], np.float32), + 10.0 if terminated else -0.1, + terminated, + truncated, + {}, + ) + + def render(self) -> np._typing.NDArray[np.uint8]: + """Implements rendering logic for this env (given the current observation). + + You should return a numpy RGB image like so: + np.array([height, width, 3], dtype=np.uint8). + + Returns: + np.ndarray: A numpy uint8 3D array (image) to render. + """ + # Image dimensions. + # Each position in the corridor is 50 pixels wide. + width = (self.end_pos + 2) * 50 + # Fixed height of the image. + height = 100 + + # Create a new image with white background + image = Image.new("RGB", (width, height), "white") + draw = ImageDraw.Draw(image) + + # Draw the corridor walls + # Grey rectangle for the corridor. + draw.rectangle([50, 30, width - 50, 70], fill="grey") + + # Draw the agent. + # Calculate the x coordinate of the agent. + agent_x = (self.cur_pos + 1) * 50 + # Blue rectangle for the agent. + draw.rectangle([agent_x + 10, 40, agent_x + 40, 60], fill="blue") + + # Draw the goal state. + # Calculate the x coordinate of the goal. + goal_x = self.end_pos * 50 + # Green rectangle for the goal state. + draw.rectangle([goal_x + 10, 40, goal_x + 40, 60], fill="green") + + # Convert the image to a uint8 numpy array. + return np.array(image, dtype=np.uint8) + + +# Create a simple multi-agent version of the above Env by duplicating the single-agent +# env n (n=num agents) times and having the agents act independently, each one in a +# different corridor. +MultiAgentCustomRenderedCorridorEnv = make_multi_agent( + lambda config: CustomRenderedCorridorEnv(config) +) + + +if __name__ == "__main__": + args = parser.parse_args() + + assert ( + args.enable_new_api_stack + ), "Must set --enable-new-api-stack when running this script!" + + # The `config` arg passed into our Env's constructor (see the class' __init__ method + # above). Feel free to change these. + env_options = { + "corridor_length": 10, + "max_steps": 100, + "num_agents": args.num_agents, # <- only used by the multu-agent version. + } + + env_cls_to_use = ( + CustomRenderedCorridorEnv + if args.num_agents == 0 + else MultiAgentCustomRenderedCorridorEnv + ) + + tune.register_env("env", lambda _: env_cls_to_use(env_options)) + + # Example config switching on rendering. + base_config = ( + PPOConfig() + # Configure our env to be the above-registered one. + .environment("env") + # Plugin our env-rendering (and logging) callback. This callback class allows + # you to fully customize your rendering behavior (which workers should render, + # which episodes, which (vector) env indices, etc..). We refer to this example + # script here for further details: + # https://github.com/ray-project/ray/blob/master/rllib/examples/envs/env_rendering_and_recording.py # noqa + .callbacks(EnvRenderCallback) + ) + + if args.num_agents > 0: + base_config.multi_agent( + policies={f"p{i}" for i in range(args.num_agents)}, + policy_mapping_fn=lambda aid, eps, **kw: f"p{aid}", + ) + + run_rllib_example_script_experiment(base_config, args) From 918f4a37ed4c97f3b7372bd07e3035a63931fe47 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 2 May 2024 12:11:55 +0200 Subject: [PATCH 2/4] wip Signed-off-by: sven1977 --- rllib/BUILD | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rllib/BUILD b/rllib/BUILD index d1209d8a6227..da81761726a9 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2278,6 +2278,24 @@ py_test( args = ["--enable-new-api-stack", "--as-test"] ) +py_test( + name = "examples/envs/custom_env_render_method", + main = "examples/envs/custom_env_render_method.py", + tags = ["team:rllib", "exclusive", "examples"], + size = "small", + srcs = ["examples/envs/custom_env_render_method.py"], + args = ["--enable-new-api-stack", "--num-agents=0"] +) + +py_test( + name = "examples/envs/custom_env_render_method_multi_agent", + main = "examples/envs/custom_env_render_method.py", + tags = ["team:rllib", "exclusive", "examples"], + size = "small", + srcs = ["examples/envs/custom_env_render_method.py"], + args = ["--enable-new-api-stack", "--num-agents=2"] +) + #@OldAPIStack py_test( name = "examples/envs/greyscale_env", From e85886979796d231c011f3e48800f8742b68b2f6 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 2 May 2024 19:00:03 +0200 Subject: [PATCH 3/4] fixes Signed-off-by: sven1977 --- rllib/BUILD | 10 +++++---- rllib/algorithms/algorithm.py | 22 +++++++++++++------ .../tests/test_callbacks_on_algorithm.py | 2 +- .../tests/test_callbacks_on_env_runner.py | 1 + .../algorithms/tests/test_worker_failures.py | 2 +- rllib/env/multi_agent_env_runner.py | 1 + rllib/env/single_agent_env_runner.py | 1 + .../self_play_with_policy_checkpoint.py | 2 +- .../curriculum/curriculum_learning.py | 1 + .../envs/env_rendering_and_recording.py | 2 ++ .../evaluation_parallel_to_training.py | 12 +++++++++- .../multi_agent/utils/self_play_callback.py | 2 +- .../utils/self_play_league_based_callback.py | 2 +- 13 files changed, 43 insertions(+), 17 deletions(-) diff --git a/rllib/BUILD b/rllib/BUILD index 29e022ade72e..e956d5ea116a 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2280,17 +2280,19 @@ py_test( py_test( name = "examples/envs/custom_env_render_method", - srcs = ["examples/envs/custom_env_render_method.py"], + main = "examples/envs/custom_env_render_method.py", tags = ["team:rllib", "exclusive", "examples"], - size = "small", + size = "medium", + srcs = ["examples/envs/custom_env_render_method.py"], args = ["--enable-new-api-stack", "--num-agents=0"] ) py_test( name = "examples/envs/custom_env_render_method_multi_agent", - srcs = ["examples/envs/custom_env_render_method.py"], + main = "examples/envs/custom_env_render_method.py", tags = ["team:rllib", "exclusive", "examples"], - size = "small", + size = "medium", + srcs = ["examples/envs/custom_env_render_method.py"], args = ["--enable-new-api-stack", "--num-agents=2"] ) diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py index 72be65d07cfb..c1d30284bd06 100644 --- a/rllib/algorithms/algorithm.py +++ b/rllib/algorithms/algorithm.py @@ -796,7 +796,7 @@ def setup(self, config: AlgorithmConfig) -> None: self.workers.sync_weights(inference_only=True) # Run `on_algorithm_init` callback after initialization is done. - self.callbacks.on_algorithm_init(algorithm=self) + self.callbacks.on_algorithm_init(algorithm=self, metrics_logger=self.metrics) @OverrideToImplementCustomLogic @classmethod @@ -999,7 +999,7 @@ def evaluate( config=self.evaluation_config, ) - self.callbacks.on_evaluate_start(algorithm=self) + self.callbacks.on_evaluate_start(algorithm=self, metrics_logger=self.metrics) env_steps = agent_steps = 0 batches = [] @@ -1097,7 +1097,11 @@ def evaluate( eval_results["off_policy_estimator"][name] = avg_estimate # Trigger `on_evaluate_end` callback. - self.callbacks.on_evaluate_end(algorithm=self, evaluation_metrics=eval_results) + self.callbacks.on_evaluate_end( + algorithm=self, + metrics_logger=self.metrics, + evaluation_metrics=eval_results, + ) # Also return the results here for convenience. return eval_results @@ -2447,9 +2451,13 @@ def load_checkpoint(self, checkpoint_dir: str) -> None: def log_result(self, result: ResultDict) -> None: # Log after the callback is invoked, so that the user has a chance # to mutate the result. - # TODO: Remove `algorithm` arg at some point to fully deprecate the old - # signature. - self.callbacks.on_train_result(algorithm=self, result=result) + # TODO (sven): It might not make sense to pass in the MetricsLogger at this late + # point in time. In here, the result dict has already been "compiled" (reduced) + # by the MetricsLogger and there is probably no point in adding more Stats + # here. + self.callbacks.on_train_result( + algorithm=self, metrics_logger=self.metrics, result=result + ) # Then log according to Trainable's logging logic. Trainable.log_result(self, result) @@ -3264,7 +3272,7 @@ def _run_one_training_iteration_and_evaluation_in_parallel_wo_thread( config=self.evaluation_config, ) - self.callbacks.on_evaluate_start(algorithm=self) + self.callbacks.on_evaluate_start(algorithm=self, metrics_logger=self.metrics) env_steps = agent_steps = 0 diff --git a/rllib/algorithms/tests/test_callbacks_on_algorithm.py b/rllib/algorithms/tests/test_callbacks_on_algorithm.py index c3533ab6ac8b..9a07da3850fc 100644 --- a/rllib/algorithms/tests/test_callbacks_on_algorithm.py +++ b/rllib/algorithms/tests/test_callbacks_on_algorithm.py @@ -35,7 +35,7 @@ def on_workers_recreated( class InitAndCheckpointRestoredCallbacks(DefaultCallbacks): - def on_algorithm_init(self, *, algorithm, **kwargs): + def on_algorithm_init(self, *, algorithm, metrics_logger, **kwargs): self._on_init_was_called = True def on_checkpoint_loaded(self, *, algorithm, **kwargs): diff --git a/rllib/algorithms/tests/test_callbacks_on_env_runner.py b/rllib/algorithms/tests/test_callbacks_on_env_runner.py index 062f39a99f01..34329c20bf41 100644 --- a/rllib/algorithms/tests/test_callbacks_on_env_runner.py +++ b/rllib/algorithms/tests/test_callbacks_on_env_runner.py @@ -49,6 +49,7 @@ def on_episode_created( episode, worker=None, env_runner=None, + metrics_logger=None, base_env=None, env=None, policies=None, diff --git a/rllib/algorithms/tests/test_worker_failures.py b/rllib/algorithms/tests/test_worker_failures.py index 1548aa2b4291..adaa80dc675e 100644 --- a/rllib/algorithms/tests/test_worker_failures.py +++ b/rllib/algorithms/tests/test_worker_failures.py @@ -225,7 +225,7 @@ class AddModuleCallback(DefaultCallbacks): def __init__(self): super().__init__() - def on_algorithm_init(self, *, algorithm, **kwargs): + def on_algorithm_init(self, *, algorithm, metrics_logger, **kwargs): # Add a custom module to algorithm. spec = algorithm.config.get_default_rl_module_spec() spec.observation_space = gym.spaces.Box(low=0, high=1, shape=(8,)) diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index 7b9042d013f6..e26968a7d36c 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -749,6 +749,7 @@ def make_env(self): # Call the `on_environment_created` callback. self._callbacks.on_environment_created( env_runner=self, + metrics_logger=self.metrics, env=self.env, env_context=env_ctx, ) diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 5250cddadcb5..23407629835e 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -702,6 +702,7 @@ def make_env(self) -> None: # Call the `on_environment_created` callback. self._callbacks.on_environment_created( env_runner=self, + metrics_logger=self.metrics, env=self.env, env_context=env_ctx, ) diff --git a/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py b/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py index 38531c626b5f..26d663cc7f2e 100644 --- a/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py +++ b/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py @@ -46,7 +46,7 @@ def __init__(self, checkpoint_dir): self._checkpoint_dir = checkpoint_dir super().__init__() - def on_algorithm_init(self, *, algorithm, **kwargs): + def on_algorithm_init(self, *, algorithm, metrics_logger, **kwargs): policy = Policy.from_checkpoint( self._checkpoint_dir, policy_ids=[OPPONENT_POLICY_ID] ) diff --git a/rllib/examples/curriculum/curriculum_learning.py b/rllib/examples/curriculum/curriculum_learning.py index b0cb6865e98a..5529138e1024 100644 --- a/rllib/examples/curriculum/curriculum_learning.py +++ b/rllib/examples/curriculum/curriculum_learning.py @@ -149,6 +149,7 @@ def on_train_result( self, *, algorithm: Algorithm, + metrics_logger=None, result: dict, **kwargs, ) -> None: diff --git a/rllib/examples/envs/env_rendering_and_recording.py b/rllib/examples/envs/env_rendering_and_recording.py index b7a3e743c93b..60b7a44606d5 100644 --- a/rllib/examples/envs/env_rendering_and_recording.py +++ b/rllib/examples/envs/env_rendering_and_recording.py @@ -103,6 +103,7 @@ def on_episode_step( *, episode, env_runner, + metrics_logger, env, env_index, rl_module, @@ -137,6 +138,7 @@ def on_episode_end( *, episode, env_runner, + metrics_logger, env, env_index, rl_module, diff --git a/rllib/examples/evaluation/evaluation_parallel_to_training.py b/rllib/examples/evaluation/evaluation_parallel_to_training.py index d1e45bed5624..e7b6af7ed3e4 100644 --- a/rllib/examples/evaluation/evaluation_parallel_to_training.py +++ b/rllib/examples/evaluation/evaluation_parallel_to_training.py @@ -66,6 +66,8 @@ | 81.7371 | 100000 | 494.68 | 494.68 | +------------------+--------+----------+--------------------+ """ +from typing import Optional + from ray.rllib.algorithms.algorithm import Algorithm from ray.rllib.algorithms.callbacks import DefaultCallbacks from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole @@ -75,6 +77,7 @@ NUM_EPISODES, NUM_ENV_STEPS_SAMPLED, ) +from ray.rllib.utils.metrics.metrics_logger import MetricsLogger from ray.rllib.utils.test_utils import ( add_rllib_example_script_args, run_rllib_example_script_experiment, @@ -124,7 +127,14 @@ class AssertEvalCallback(DefaultCallbacks): - def on_train_result(self, *, algorithm: Algorithm, result: ResultDict, **kwargs): + def on_train_result( + self, + *, + algorithm: Algorithm, + metrics_logger: Optional[MetricsLogger] = None, + result: ResultDict, + **kwargs, + ): # The eval results can be found inside the main `result` dict # (old API stack: "evaluation"). eval_results = result.get(EVALUATION_RESULTS, result.get("evaluation", {})) diff --git a/rllib/examples/multi_agent/utils/self_play_callback.py b/rllib/examples/multi_agent/utils/self_play_callback.py index 3554cebcff90..c9dd443ae517 100644 --- a/rllib/examples/multi_agent/utils/self_play_callback.py +++ b/rllib/examples/multi_agent/utils/self_play_callback.py @@ -18,7 +18,7 @@ def __init__(self, win_rate_threshold): # Report the matchup counters (who played against whom?). self._matching_stats = defaultdict(int) - def on_train_result(self, *, algorithm, result, **kwargs): + def on_train_result(self, *, algorithm, metrics_logger=None, result, **kwargs): # Get the win rate for the train batch. # Note that normally, one should set up a proper evaluation config, # such that evaluation always happens on the already updated policy, diff --git a/rllib/examples/multi_agent/utils/self_play_league_based_callback.py b/rllib/examples/multi_agent/utils/self_play_league_based_callback.py index 68c2880ac741..a8f1c74daf86 100644 --- a/rllib/examples/multi_agent/utils/self_play_league_based_callback.py +++ b/rllib/examples/multi_agent/utils/self_play_league_based_callback.py @@ -32,7 +32,7 @@ def __init__(self, win_rate_threshold): # Report the matchup counters (who played against whom?). self._matching_stats = defaultdict(int) - def on_train_result(self, *, algorithm, result, **kwargs): + def on_train_result(self, *, algorithm, metrics_logger=None, result, **kwargs): local_worker = algorithm.workers.local_worker() # Avoid `self` being pickled into the remote function below. From 7bc37128657d30b1949b0600c58a3cab291c13bb Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 2 May 2024 19:53:06 +0200 Subject: [PATCH 4/4] fixes Signed-off-by: sven1977 --- rllib/env/multi_agent_env_runner.py | 8 ++++---- rllib/env/single_agent_env_runner.py | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index e26968a7d36c..e342c2ded892 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -60,6 +60,10 @@ def __init__(self, config: AlgorithmConfig, **kwargs): # Get the worker index on which this instance is running. self.worker_index: int = kwargs.get("worker_index") + # Set up all metrics-related structures and counters. + self.metrics: Optional[MetricsLogger] = None + self._setup_metrics() + # Create our callbacks object. self._callbacks: DefaultCallbacks = self.config.callbacks_class() @@ -86,10 +90,6 @@ def __init__(self, config: AlgorithmConfig, **kwargs): # Create the two connector pipelines: env-to-module and module-to-env. self._module_to_env = self.config.build_module_to_env_connector(self.env) - # Set up all metrics-related structures and counters. - self.metrics: Optional[MetricsLogger] = None - self._setup_metrics() - self._needs_initial_reset: bool = True self._episode: Optional[MultiAgentEpisode] = None self._shared_data = None diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 23407629835e..1bac56bf9dab 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -52,11 +52,16 @@ def __init__(self, config: AlgorithmConfig, **kwargs): """ super().__init__(config=config) + self.worker_index = kwargs.get("worker_index") + + # Create a MetricsLogger object for logging custom stats. + self.metrics = MetricsLogger() + # Initialize lifetime counts. + self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum") + # Create our callbacks object. self._callbacks: DefaultCallbacks = self.config.callbacks_class() - self.worker_index = kwargs.get("worker_index") - # Create the vectorized gymnasium env. self.env: Optional[gym.Wrapper] = None self.num_envs: int = 0 @@ -98,11 +103,6 @@ def __init__(self, config: AlgorithmConfig, **kwargs): # Create the two connector pipelines: env-to-module and module-to-env. self._module_to_env = self.config.build_module_to_env_connector(self.env) - # Create a MetricsLogger object for logging custom stats. - self.metrics = MetricsLogger() - # Initialize lifetime counts. - self.metrics.log_value(NUM_ENV_STEPS_SAMPLED_LIFETIME, 0, reduce="sum") - # This should be the default. self._needs_initial_reset: bool = True self._episodes: List[Optional[SingleAgentEpisode]] = [