### **Imports**

In [1]:
# Unity Environment
from mlagents_envs.environment import UnityEnvironment
from util import UnityParallelEnv

# Environment Utils
from pettingzoo import ParallelEnv
from pettingzoo.utils import BaseParallelWrapper
import gymnasium as gym
import functools

# Utils
import numpy as np

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


## **Analyze Environment**

In [2]:
ENV_NAME = "SoccerTwos"

def create_env(no_graphics=False):
    # Create unity environment
    unity_env = UnityEnvironment(file_name="../../../envs/SoccerTwos", worker_id=np.random.randint(10000), no_graphics=no_graphics)

    # Wrap it into Zookeeper Parallel API
    parallel_env = UnityParallelEnv(unity_env)

    return parallel_env

### **Test**

In [3]:
def test_env(env):
    # Agents
    print("Number of Agents:", len(env.agents))

    # Get example agent
    agent = env.agents[0]
    print("Example Agent:", agent)

    # Print action space
    action_space = env.action_space(agent)
    print("Agent Action Space:", action_space, type(action_space))

    # Print observation space
    observation_space = env.observation_space(agent)
    print("Agent Observation Space:", observation_space, type(observation_space))

    # Each observation/action is a dict keyed with agent with value the individual action/obs space of each agent
    # Each agent in this case should share the same policy (-> all have the same obs/action space)

    obs, info = env.reset()
    print("Observation Example:", obs[agent])

    # Simple test
    while env.agents:
        actions = {a: env.action_space(a).sample() for a in env.agents}
        print("Action Example:", actions[agent], type(actions[agent]))
        obs, rew, term, trunc, info = env.step(actions)
        break

    # Should do nothing, test to see if it exists
    env.render() 

In [8]:
env = create_env(no_graphics=True)
test_env(env)
env.close()

'''OUTPUT:
Number of Agents: 32
Example Agent: SoccerTwos?team=0?agent_id=10
Agent Action Space: MultiDiscrete([3 3 3]) <class 'gymnasium.spaces.multi_discrete.MultiDiscrete'>
Agent Observation Space: Tuple(Box(-inf, inf, (264,), float32), Box(-inf, inf, (72,), float32)) <class 'gymnasium.spaces.tuple.Tuple'>
'''

[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memorysetup-profiler-allocator-block-size=16777216"
    "memorysetup-profiler-editor-allocator-block-size=1048576"
    "memorysetup-temp-allocator-siz

"OUTPUT:\nNumber of Agents: 32\nExample Agent: SoccerTwos?team=0?agent_id=10\nAgent Action Space: MultiDiscrete([3 3 3]) <class 'gymnasium.spaces.multi_discrete.MultiDiscrete'>\nAgent Observation Space: Tuple(Box(-inf, inf, (264,), float32), Box(-inf, inf, (72,), float32)) <class 'gymnasium.spaces.tuple.Tuple'>\n"

### Reduce Observation/Action Space
Currently we have the observation space split up into 2 1D vectors and the action space as 3 discrete outputs each with 3 options. For our model and for full compatibility with petting zoo its better to not unnecassirly divide things that can be concatenated, especially the action space with is really 27 options not independent 3 sets of 3 options.
- Concat observation vector.
- Flatten 3D action space into 1D. (including action_mask)

In [4]:
def flatten_mask(masks):
    '''
    Flattens multi-dimensions mask.
    - Masks is a tuple containing np.array of dtype some int

    Ex: [1, 0] x [1, 1] = [1, 1, 0, 0]
    This corresponds to the cross product with & of (first input is row, second is column):
    * 1 1
    1 1 1
    0 0 0
    Then reduce the 2D array [[1, 1], [0, 0]] to [1, 1, 0, 0].
    For more than 2 masks, use procedure recursively.
    '''
    assert isinstance(masks, tuple), "Mask array must be a tuple"
    flat = masks[0]
    for mask in masks[1:]:
        # Flat is along the row dim, the next mask is along the column. & 2D Product -> Flatten
        flat = (flat[:, None] & mask[None, :]).reshape(-1)
    return flat


class ConcatParallelEnv(BaseParallelWrapper):
    def __init__(self, env):
        """
        - Concats observation space. (Tuple of boxes, else passthrough)
        - Flattens multi dim discrete action space. (only works with MultiDiscrete, else passthrough)
        """
        super().__init__(env)

    def reset(self, seed=None, options=None):
        # Flatten output obs
        obs, info = super().reset(seed, options)
        return self._flatten_obs(obs), info
    
    def step(self, actions):
        # Unflatten input actions, flatten output obs
        obs, rew, term, trunc, info = super().step(self._unflatten_actions(actions))
        return self._flatten_obs(obs), rew, term, trunc, info

    def _flatten_obs(self, obs):
        for agent, agent_obs in obs.items():
            # concat obs
            observation_space = super().observation_space(agent)
            if isinstance(observation_space, gym.spaces.Tuple):
                agent_obs["observation"] = np.concatenate(agent_obs["observation"])

            # flatten action_mask
            action_space = super().action_space(agent)
            if isinstance(action_space, gym.spaces.MultiDiscrete):
                agent_obs["action_mask"] = flatten_mask(agent_obs["action_mask"])
        return obs
    
    def _unflatten_actions(self, actions):
        unflat_actions = {}
        for agent, agent_action in actions.items():
            # Single index -> Multi dim index
            action_space = super().action_space(agent)
            if isinstance(action_space, gym.spaces.MultiDiscrete):
                unflat_actions[agent] = np.array(np.unravel_index(agent_action, action_space.nvec))
            else:
                unflat_actions[agent] = agent_action
        return unflat_actions
    
    @functools.lru_cache(maxsize=None)
    def observation_space(self, agent):
        observation_space = super().observation_space(agent)
        if isinstance(observation_space, gym.spaces.Tuple):
            return gym.spaces.utils.flatten_space(observation_space)
        return observation_space

    @functools.lru_cache(maxsize=None)
    def action_space(self, agent):
        action_space = super().action_space(agent)
        if isinstance(action_space, gym.spaces.MultiDiscrete):
            return gym.spaces.Discrete(np.prod(action_space.nvec))
        return action_space

In [5]:
env = ConcatParallelEnv(create_env(no_graphics=True))
test_env(env)
env.close()

'''OUTPUT:
Number of Agents: 32
Example Agent: SoccerTwos?team=0?agent_id=10
Agent Action Space: Discrete(27) <class 'gymnasium.spaces.discrete.Discrete'>
Agent Observation Space: Box(-inf, inf, (336,), float32) <class 'gymnasium.spaces.box.Box'>
'''

[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memorysetup-profiler-allocator-block-size=16777216"
    "memorysetup-profiler-editor-allocator-block-size=1048576"
    "memorysetup-temp-allocator-siz

"OUTPUT:\nNumber of Agents: 32\nExample Agent: SoccerTwos?team=0?agent_id=10\nAgent Action Space: Discrete(27) <class 'gymnasium.spaces.discrete.Discrete'>\nAgent Observation Space: Box(-inf, inf, (336,), float32) <class 'gymnasium.spaces.box.Box'>\n"

### Official Test

This test still fails because petting zoo does not support multi-discrete with action masks. If you use action masks it expects a single discrete. Could technically change this, but it may be unnecessary work.

In [6]:
from pettingzoo.test import parallel_api_test
env = ConcatParallelEnv(create_env(no_graphics=True))
parallel_api_test(env, num_cycles=100)
env.close()

[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memorysetup-profiler-allocator-block-size=16777216"
    "memorysetup-profiler-editor-allocator-block-size=1048576"
    "memorysetup-temp-allocator-siz

### **Random Policy Game**

In [7]:
env = ConcatParallelEnv(create_env(no_graphics=False))
obs, info = env.reset()
agent = env.agents[0]

while env.agents:
    action_mask = obs[agent]["action_mask"]
    print(action_mask)
    legal_actions = np.flatnonzero(action_mask)
    # print(legal_actions)
    if len(legal_actions) == 0:
        print("NO VALID ACTIONS")
        break
    else:
        print("VALID ACTIONS")
    actions = {a: env.action_space(a).sample() for a in env.agents}
    print("ACTION CHOSEN:", actions[agent])
    obs, rew, term, trunc, info = env.step(actions)

[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memorysetup-profiler-allocator-block-size=16777216"
    "memorysetup-profiler-editor-allocator-block-size=1048576"
    "memorysetup-temp-allocator-siz

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [16]:
env.close()

In [16]:
x = np.ravel_multi_index((1,9), (10,10))
y = np.unravel_index(x, (10,10))
x, y

(np.int64(19), (np.int64(1), np.int64(9)))

In [19]:
flatten_mask((np.array([1, 0]) ,np.array([1, 0]), np.array([1, 1])))

array([1, 1, 0, 0, 0, 0, 0, 0])