In [1]:
import os
import ray
import supersuit as ss 
from ray import tune    # experiment runner
from pettingzoo.butterfly import cooperative_pong_v5
from ray.tune.registry import register_env
from ray.rllib.env import ParallelPettingZooEnv
from ray.rllib.algorithms.ppo import PPOConfig

ray.init(num_cpus=6)

2024-10-29 15:43:43,188	INFO worker.py:1816 -- Started a local Ray instance.


0,1
Python version:,3.11.6
Ray version:,2.38.0


# Ray, RLLib and Pettingzoo
To perform RL experiments with Ray, four things are needed: 
1. RL Environment 
2. RL Algorithm
3. Configuration of the environment, algorithm and the experiment
4. Experiment Runner


The multi-agent environments from PettingZoo are not directly compatible with Ray, and need to be wrapped with either the ```PettingZooEnv``` or the ```PettingZooParallelEnv``` wrappers, depending on whether an ```AEC``` or a ```ParallelEnv``` environment is being used. In the following the ```ParallelEnv``` option will be shown using the ```cooperative_pong_v5```.

In [2]:
env_name = 'cooperative_pong'

def env_creator(env_config):
    env = cooperative_pong_v5.parallel_env(render_mode=env_config.get("render_mode", "human"))
    env = ss.color_reduction_v0(env, mode='B')
    env = ss.resize_v1(env, x_size=84, y_size=84)
    env = ss.frame_stack_v1(env, 4)
    env = ss.dtype_v0(env, 'float32')
    return env

register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))

In the ```env_creator``` function we initialise the environment and using ```Supersuit``` we wrap the environment to be compatible with the ```rllib``` algorithms. The single functions are described [here](https://pypi.org/project/SuperSuit/3.3.1/). 

By registering the environment under the ```env_name``` we can access it from the ```rllib``` API. When the env is called, it invokes the ```env_creator``` function defined above and passes the arguments in ```config```. The ```.get()``` function retrieves the values for the key, allowing the specification of a default value if that key doesn't exist in the dictionary.

There are multiple ways of writing an algorithm configuration and running the experiment, with multiple [configuration options](https://docs.ray.io/en/latest/rllib/rllib-training.html?_gl=1*wviehk*_up*MQ..*_ga*MTA1MDM5NzY0Ny4xNzMwMTEwMTE0*_ga_0LCWHW1N3S*MTczMDExMDExMy4xLjEuMTczMDExMDE5MC4wLjAuMA..#configuring-rllib-algorithms). In the following two variants are shown using the ```PPOConfig``` model / class . One method is to set all configuration settings in the ```config``` object directly when accessing ```PPOConfig``` and run the configuration using ```tune.run()```.

In [None]:
config = (
    PPOConfig()
    .environment(env=env_name)
    .framework("torch")
    .training(
            train_batch_size=512,
            lr=2e-5,
            gamma=0.99,
            lambda_=0.9,
            use_gae=True,
            clip_param=0.4,
            grad_clip=None,
            entropy_coeff=0.1,
            vf_loss_coeff=0.25,
            num_sgd_iter=10,
    )
)

tune.run(
    'PPO',
    config=config.to_dict(),
    name='ppo_cooperative_pong',
)

An alternative is to initialise a ```PPOConfig``` object and then apply all configuration settings individually. In the following code the [new API stack](https://docs.ray.io/en/latest/rllib/rllib-new-api-stack.html) is activated by setting the two parameters of [```AlgorithmConcfig.api_stack```](https://docs.ray.io/en/latest/rllib/package_ref/doc/ray.rllib.algorithms.algorithm_config.AlgorithmConfig.api_stack.html) to ```True```. 

This configuration can then be built into an ```Algorithm``` object and the ```.train()``` method used to run a single training iteration. 

In [3]:
config = PPOConfig()
config.api_stack(
    enable_env_runner_and_connector_v2=True,
    enable_rl_module_and_learner=True,
)
config.environment(env=env_name)
config.training(
    gamma=0.9, lr=0.01, kl_coeff=0.3, train_batch_size_per_learner=256
)

algorithm = config.build()

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
2024-10-29 15:44:18,197	ERROR actor_manager.py:804 -- Ray error (The actor died because of an error raised in its creation task, [36mray::SingleAgentEnvRunner.__init__()[39m (pid=39484, ip=127.0.0.1, actor_id=415991677ba2e10a3782f9d401000000, repr=<ray.rllib.env.single_agent_env_runner.SingleAgentEnv

[36m(SingleAgentEnvRunner pid=39484)[0m   gym.logger.warn(
[36m(SingleAgentEnvRunner pid=39484)[0m   logger.warn(
[36m(SingleAgentEnvRunner pid=39484)[0m   logger.warn(
[36m(SingleAgentEnvRunner pid=39484)[0m   logger.warn(
[36m(SingleAgentEnvRunner pid=39484)[0m Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::SingleAgentEnvRunner.__init__()[39m (pid=39484, ip=127.0.0.1, actor_id=415991677ba2e10a3782f9d401000000, repr=<ray.rllib.env.single_agent_env_runner.SingleAgentEnvRunner object at 0x0000027190B40A10>)
[36m(SingleAgentEnvRunner pid=39484)[0m              ^^^^^^^^^^^^^^^^^^
[36m(SingleAgentEnvRunner pid=39484)[0m   File "c:\Users\ushe\VSCode\PettingZoo_Intro\.venv\Lib\site-packages\ray\rllib\core\rl_module\torch\torch_rl_module.py", line 50, in __init__
[36m(SingleAgentEnvRunner pid=39484)[0m     RLModule.__init__(self, *args, **kwargs)
[36m(SingleAgentEnvRunner pid=39484)[0m   File "c:\Users\ushe\VSCod

AttributeError: 'NoneType' object has no attribute 'actor_critic_encoder_config'

In [None]:
from ray.tune.logger import pretty_print

results = algorithm.train()
pretty_print(results)