# Simple Spread Testing

## Standard Simple Spread (Collaborative Only)

Agent observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, communication]`
 - `self_vel = (2, )`
 - `self_pos = (2, )`
 - `landmark_rel_positions = (2 * N, )`
 - `other_agent_rel_positions = (2 * (N - 1), )`
 - `communication = (2 * (N - 1), )`

Agent action space: `[no_action, move_left, move_right, move_down, move_up] = (0-4)` 

In [2]:
from pettingzoo.mpe import simple_spread_v3

In [7]:
env = simple_spread_v3.parallel_env(N=5)
observations, infos = env.reset()
observations, infos


({'agent_0': array([ 0.        ,  0.        , -0.914505  ,  0.7369242 ,  1.2280984 ,
         -0.31268778,  0.2608471 , -1.296406  ,  0.9618274 , -0.22345683,
          1.8744236 , -0.32145205,  0.34718683, -0.3769984 ,  1.7224408 ,
         -0.32635674,  1.257719  , -0.48978192,  0.79144037, -1.7273643 ,
          0.0581587 , -0.7527819 ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        dtype=float32),
  'agent_1': array([ 0.        ,  0.        ,  0.8079359 ,  0.4105674 , -0.49434245,
          0.01366896, -1.4615937 , -0.9700493 , -0.7606135 ,  0.10289992,
          0.15198275,  0.00490471, -1.375254  , -0.05064164, -1.7224408 ,
          0.32635674, -0.46472183, -0.16342516, -0.93100053, -1.4010077 ,
         -1.6642822 , -0.42642516,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        dtype=float32),
  'agent_2': array([ 0.        ,  0.    

In [8]:
env.num_agents

5

: 

In [5]:
env.state()

array([ 0.        ,  0.        ,  0.20602952,  0.2017099 ,  0.10246428,
       -0.95494974,  0.29350424,  0.6085242 , -0.09458406,  0.5626049 ,
        0.11323519, -0.16783085,  0.15982419,  0.0426769 ,  0.2274976 ,
       -0.56936574,  0.53829366, -0.9099832 ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.3658537 ,  0.24438679, -0.05735991, -0.99762666,
        0.13368003,  0.56584734, -0.25440824,  0.519928  , -0.04658899,
       -0.21050777, -0.15982419, -0.0426769 ,  0.06767341, -0.61204267,
        0.3784695 , -0.9526601 ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.4335271 , -0.36765587, -0.12503332, -0.38558397,  0.06600662,
        1.17789   , -0.32208166,  1.1319706 , -0.1142624 ,  0.40153491,
       -0.2274976 ,  0.56936574, -0.06767341,  0.61204267,  0.31079608,
       -0.34061742,  0.        ,  0.        ,  0.        ,  0.  

In [6]:
print(observations["agent_0"].shape)
observations["agent_0"]

(24,)


array([ 0.        ,  0.        ,  0.20602952,  0.2017099 ,  0.10246428,
       -0.95494974,  0.29350424,  0.6085242 , -0.09458406,  0.5626049 ,
        0.11323519, -0.16783085,  0.15982419,  0.0426769 ,  0.2274976 ,
       -0.56936574,  0.53829366, -0.9099832 ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ], dtype=float32)

In [13]:
env.action_space("agent_0")

Discrete(5)

In [33]:
# this is where you would insert your policy
# actions = {agent: env.action_space(agent).sample() for agent in env.agents}
actions = {agent: 0 for agent in env.agents}
observations, rewards, terminations, truncations, infos = env.step(actions)
observations, rewards, terminations, truncations, infos

({}, defaultdict(int, {}), {}, {}, {})

## Adversarial Variant (Custom)

Agent observations: `[self_is_adversary, self_vel, self_pos, landmark_rel_positions, other_agent_is_adversary_rel_positions]`
 - `self_is_adversary = (1, )`: 0 / 1 flag
 - `self_vel = (2, )`
 - `self_pos = (2, )`
 - `landmark_rel_positions = (2 * n_landmarks, )`
 - `other_agent_is_adversary_rel_positions = ((1 + 2) * (n_agents + n_adversaries - 1), )`: 0 / 1 flag  for if that other agent is an adversary + relative position for the other agent times the number of other agents

Agent action space: `[no_action, move_left, move_right, move_down, move_up] = (0-4)` 

In [None]:
%load_ext autoreload
%autoreload 2

import simple_spread_adversarial

In [3]:
env = simple_spread_adversarial.parallel_env(n_agents=2, n_adversaries=2, n_landmarks=2)
observations, infos = env.reset()
observations, infos

({'adversary_0': array([ 1.        ,  0.        ,  0.        , -0.34062102, -0.77136046,
          1.2776079 ,  1.5442173 , -0.45429146,  0.3167862 ,  1.        ,
          0.62717503,  0.4534684 ,  0.        ,  0.89588726,  0.5511219 ,
          0.        ,  1.1182889 ,  0.10703828], dtype=float32),
  'adversary_1': array([ 1.        ,  0.        ,  0.        ,  0.286554  , -0.31789204,
          0.6504329 ,  1.0907489 , -1.0814664 , -0.1366822 ,  1.        ,
         -0.62717503, -0.4534684 ,  0.        ,  0.26871222,  0.09765349,
          0.        ,  0.49111387, -0.34643012], dtype=float32),
  'agent_0': array([ 0.        ,  0.        ,  0.        ,  0.5552662 , -0.22023855,
          0.38172063,  0.9930954 , -1.3501787 , -0.23433569,  1.        ,
         -0.89588726, -0.5511219 ,  1.        , -0.26871222, -0.09765349,
          0.        ,  0.22240166, -0.44408363], dtype=float32),
  'agent_1': array([ 0.        ,  0.        ,  0.        ,  0.7776679 , -0.6643222 ,
          0.1

In [4]:
env.num_agents, env.agents

(4, ['adversary_0', 'adversary_1', 'agent_0', 'agent_1'])

In [5]:
env.state()

array([ 1.        ,  0.        ,  0.        , -0.34062102, -0.77136046,
        1.2776079 ,  1.5442173 , -0.45429146,  0.3167862 ,  1.        ,
        0.62717503,  0.4534684 ,  0.        ,  0.89588726,  0.5511219 ,
        0.        ,  1.1182889 ,  0.10703828,  1.        ,  0.        ,
        0.        ,  0.286554  , -0.31789204,  0.6504329 ,  1.0907489 ,
       -1.0814664 , -0.1366822 ,  1.        , -0.62717503, -0.4534684 ,
        0.        ,  0.26871222,  0.09765349,  0.        ,  0.49111387,
       -0.34643012,  0.        ,  0.        ,  0.        ,  0.5552662 ,
       -0.22023855,  0.38172063,  0.9930954 , -1.3501787 , -0.23433569,
        1.        , -0.89588726, -0.5511219 ,  1.        , -0.26871222,
       -0.09765349,  0.        ,  0.22240166, -0.44408363,  0.        ,
        0.        ,  0.        ,  0.7776679 , -0.6643222 ,  0.15931898,
        1.4371791 , -1.5725803 ,  0.20974793,  1.        , -1.1182889 ,
       -0.10703828,  1.        , -0.49111387,  0.34643012,  0.  

In [6]:
print(observations["agent_0"].shape)
observations["agent_0"]

(18,)


array([ 0.        ,  0.        ,  0.        ,  0.5552662 , -0.22023855,
        0.38172063,  0.9930954 , -1.3501787 , -0.23433569,  1.        ,
       -0.89588726, -0.5511219 ,  1.        , -0.26871222, -0.09765349,
        0.        ,  0.22240166, -0.44408363], dtype=float32)

In [7]:
env.action_space("agent_0")

Discrete(5)

In [20]:
# this is where you would insert your policy
# actions = {agent: env.action_space(agent).sample() for agent in env.agents}
actions = {agent: env.action_space(agent).sample() for agent in env.agents}
observations, rewards, terminations, truncations, infos = env.step(actions)
observations, rewards, terminations, truncations, infos

({'adversary_0': array([ 1.        , -0.4815388 ,  0.6319574 , -1.3480055 , -0.22414342,
          2.2849925 ,  0.9970003 ,  0.553093  , -0.23043083,  1.        ,
          1.4915671 , -0.01874314,  0.        ,  2.4339936 ,  0.05084492,
          0.        ,  1.6320343 ,  0.22265871], dtype=float32),
  'adversary_1': array([ 1.        ,  0.09256034,  1.2162107 ,  0.1435616 , -0.24288656,
          0.79342526,  1.0157434 , -0.93847406, -0.21168768,  1.        ,
         -1.4915671 ,  0.01874314,  0.        ,  0.94242644,  0.06958806,
          0.        ,  0.14046715,  0.24140185], dtype=float32),
  'agent_0': array([ 0.        ,  0.3493707 ,  0.5996582 ,  1.085988  , -0.1732985 ,
         -0.14900117,  0.94615537, -1.8809005 , -0.28127575,  1.        ,
         -2.4339936 , -0.05084492,  1.        , -0.94242644, -0.06958806,
          0.        , -0.8019593 ,  0.1718138 ], dtype=float32),
  'agent_1': array([ 0.0000000e+00, -6.7715675e-01,  1.2221736e+00,  2.8402874e-01,
         -1.48

In [32]:
# Visualize full episode
env = simple_spread_adversarial.parallel_env(
    n_agents=2,
    n_adversaries=2,
    n_landmarks=3,
    render_mode="human"
)
observations, infos = env.reset()

while env.agents:
    # this is where you would insert your policy
    actions = {agent: env.action_space(agent).sample() for agent in env.agents}

    observations, rewards, terminations, truncations, infos = env.step(actions)
env.close()
