In [1]:
"""Uses Ray's RLLib to train agents to play Pistonball.

Author: Rohan (https://github.com/Rohan138)
"""

import os

#import supersuit as ss
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.tune.registry import register_env
from torch import nn

from pettingzoo.mpe import simple_tag_v2
from pettingzoo.test import render_test
from pettingzoo.test import performance_benchmark
from pettingzoo.test import test_save_obs

import time
import random

#raise NotImplementedError(
#    "There are currently bugs in this tutorial, we will fix them soon."
#)


In [2]:

class CNNModelV2(TorchModelV2, nn.Module):
    def __init__(self, obs_space, act_space, num_outputs, *args, **kwargs):
        TorchModelV2.__init__(self, obs_space, act_space, num_outputs, *args, **kwargs)
        nn.Module.__init__(self)
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, [8, 8], stride=(4, 4)),
            nn.ReLU(),
            nn.Conv2d(32, 64, [4, 4], stride=(2, 2)),
            nn.ReLU(),
            nn.Conv2d(64, 64, [3, 3], stride=(1, 1)),
            nn.ReLU(),
            nn.Flatten(),
            (nn.Linear(3136, 512)),
            nn.ReLU(),
        )
        self.policy_fn = nn.Linear(512, num_outputs)
        self.value_fn = nn.Linear(512, 1)

    def forward(self, input_dict, state, seq_lens):
        model_out = self.model(input_dict["obs"].permute(0, 3, 1, 2))
        self._value_out = self.value_fn(model_out)
        return self.policy_fn(model_out), state

    def value_function(self):
        return self._value_out.flatten()


In [3]:


def env_creator(render_mode="rgb_array", cycles=200):
    
    from src.world import world_utils
    env = world_utils.env(render_mode=render_mode, max_cycles=cycles)

    return env


In [4]:
cycles = 800
env = env_creator(cycles=cycles)
env.reset()
current_cycle = 0
agent_count = 4  # todo: get from env
action_queue = []

for agent in env.agent_iter():
    if current_cycle >= cycles * agent_count:
        break
    if current_cycle % agent_count == 0:
        adversary_0_action = random.choice([0, 1, 2, 3, 4])
        adversary_1_action = random.choice([0, 1, 2, 3, 4])
        adversary_2_action = random.choice([0, 1, 2, 3, 4])
        good_agent_action = random.choice([0, 1, 2, 3, 4])

        action_queue += [
            adversary_0_action,
            adversary_1_action,
            adversary_2_action,
            good_agent_action
        ]
    # print(agent)
    env.render()
    # obs, reward, done, info = env.last()
    observation, cumulative_rewards, terminations, truncations, infos = env.last()
    action = action_queue.pop(0)
    env.step(action)
    current_cycle += 1

    # Following this but it's not working: https://github.com/openai/multiagent-particle-envs/issues/76
    # score+=reward
else:
    env.close()

In [11]:

if __name__ == "__main__":
    env_name = "pistonball_v6"

    register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))
    ModelCatalog.register_custom_model("CNNModelV2", CNNModelV2)

    config = (
        PPOConfig()
        .rollouts(num_rollout_workers=4, rollout_fragment_length='auto')
        .training(
            train_batch_size=512,
            lr=2e-5,
            gamma=0.99,
            lambda_=0.9,
            use_gae=True,
            clip_param=0.4,
            grad_clip=None,
            entropy_coeff=0.1,
            vf_loss_coeff=0.25,
            sgd_minibatch_size=64,
            num_sgd_iter=10,
        )
        .environment(env=env_name, clip_actions=True)
        .debugging(log_level="ERROR")
        .framework(framework="torch")
        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
    )

    tune.run(
        "PPO",
        name="PPO",
        stop={"timesteps_total": 5000000},
        checkpoint_freq=10,
        local_dir="~/ray_results/" + env_name,
        config=config.to_dict(),
    )


0,1
Current time:,2023-03-10 17:44:27
Running for:,00:00:15.51
Memory:,10.6/15.9 GiB

Trial name,# failures,error file
PPO_pistonball_v6_c9148_00000,1,C:\Users\rober\ray_results\pistonball_v6\PPO\PPO_pistonball_v6_c9148_00000_0_2023-03-10_17-44-12\error.txt

Trial name,status,loc
PPO_pistonball_v6_c9148_00000,ERROR,


[2m[36m(PPO pid=25428)[0m 2023-03-10 17:44:18,730	INFO algorithm.py:506 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2023-03-10 17:44:27,502	ERROR trial_runner.py:1062 -- Trial PPO_pistonball_v6_c9148_00000: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "D:\Study_Documents\thesis\env\lib\site-packages\ray\tune\execution\ray_trial_executor.py", line 1276, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "D:\Study_Documents\thesis\env\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "D:\Study_Documents\thesis\env\lib\site-packages\ray\_private\worker.py", line 2382, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::PPO.__init__()[39m (pid=25428, ip=127.0.0.1, repr=PPO)
  File "D:\Stu

Trial name,trial_id
PPO_pistonball_v6_c9148_00000,c9148_00000


[2m[36m(PPO pid=25428)[0m 2023-03-10 17:44:27,481	ERROR actor_manager.py:496 -- Ray error, taking actor 1 out of service. The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=11700, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x00000147939BB5E0>)
[2m[36m(PPO pid=25428)[0m   File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
[2m[36m(PPO pid=25428)[0m   File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
[2m[36m(PPO pid=25428)[0m   File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
[2m[36m(PPO pid=25428)[0m   File "D:\Study_Documents\thesis\env\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
[2m[36m(PPO pid=25428)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(PPO pid=25428)[0m   File "D:\Study_Documents\thesis\env\lib\site-packages\ray\util\tracing\tracin

[2m[36m(RolloutWorker pid=12752)[0m 2023-03-10 17:44:27,433	ERROR worker.py:772 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=12752, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001CA7F97B610>)
[2m[36m(RolloutWorker pid=12752)[0m   File "python\ray\_raylet.pyx", line 857, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=12752)[0m   File "python\ray\_raylet.pyx", line 861, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=12752)[0m   File "python\ray\_raylet.pyx", line 803, in ray._raylet.execute_task.function_executor
[2m[36m(RolloutWorker pid=12752)[0m   File "D:\Study_Documents\thesis\env\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
[2m[36m(RolloutWorker pid=12752)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(RolloutWorker pid=12752)[0m   File "D:\Study_Docume

TuneError: ('Trials did not complete', [PPO_pistonball_v6_c9148_00000])