# 🧠 Using MOISE+MARL in Multi-Particle Environment (Predator-Prey)

This notebook demonstrates how to apply the **MOISE+MARL framework** to the Predator-Prey scenario using the **Multi-Particle Environment (MPE)** with predefined adversarial roles.

## 1. Import Dependencies

In [None]:
import gym
import math
from marllib import marl
from mma_wrapper.label_manager import label_manager
from mma_wrapper.organizational_model import (
    organizational_model, structural_specifications,
    functional_specifications, deontic_specifications,
    deontic_specification, time_constraint_type
)
from mma_wrapper.organizational_specification_logic import role_logic
from mma_wrapper.utils import label, observation, action, trajectory

## 2. Define Label Manager for MPE

In [None]:
class mpe_label_manager(label_manager):

    def __init__(self, action_space: gym.Space = None, observation_space: gym.Space = None):
        super().__init__(action_space, observation_space)
        self.action_encode = {
            "no_action": 0, "move_left": 1, "move_right": 2, "move_down": 3, "move_up": 4
        }
        self.action_decode = {v: k for k, v in self.action_encode.items()}
        self.normal_leader_adversary_sizes = {
            'self_vel': 2, 'self_pos': 2, 'landmark_rel_positions': 10,
            'other_agent_rel_positions': 10, 'other_agent_velocities': 4,
            'self_in_forest': 2, 'leader_comm': 4
        }

    def one_hot_encode_observation(self, observation, agent=None):
        return [val for val in observation.values()]

    def one_hot_decode_observation(self, observation, agent=None):
        sizes = self.normal_leader_adversary_sizes
        extracted = {}
        index = 0
        for key, size in sizes.items():
            extracted[key] = observation[index:index+size]
            index += size
        return extracted

    def one_hot_encode_action(self, action, agent=None):
        return self.action_encode[action]

    def one_hot_decode_action(self, action, agent=None):
        return self.action_decode[action]

## 3. Define Role Logic Functions

In [None]:
def leader_adversary_fun(trajectory, observation, agent_name, label_manager):
    data = label_manager.one_hot_decode_observation(observation, agent=agent_name)
    other_positions = data["other_agent_rel_positions"]
    other_positions = {agent: (other_positions[i*2], other_positions[i*2+1])
                       for i, agent in enumerate(['adversary_0', 'adversary_1', 'adversary_2', 'agent_0', 'agent_1'])}
    min_dist, min_agent = 1e5, None
    for good_agent in ["agent_0", "agent_1"]:
        d = math.sqrt(sum([x**2 for x in other_positions[good_agent]]))
        if d < min_dist:
            min_dist, min_agent = d, good_agent
    vec = other_positions[min_agent]
    if abs(vec[0]) > abs(vec[1]):
        return 2 if vec[0] > 0 else 1
    else:
        return 4 if vec[1] > 0 else 3
    return 0

def normal_adversary_fun(trajectory, observation, agent_name, label_manager):
    agents = ['leadadversary_0', 'adversary_0', 'adversary_1', 'adversary_2', 'agent_0', 'agent_1']
    agents.remove(agent_name)
    data = label_manager.one_hot_decode_observation(observation, agent=agent_name)
    positions = data["other_agent_rel_positions"]
    positions = {agent: (positions[i*2], positions[i*2+1]) for i, agent in enumerate(agents)}
    min_dist, min_agent = 1e5, None
    for good_agent in ["agent_0", "agent_1"]:
        d = math.sqrt(sum([x**2 for x in positions[good_agent]]))
        if d < min_dist:
            min_dist, min_agent = d, good_agent
    vec = positions[min_agent]
    if abs(vec[0]) > abs(vec[1]):
        return 2 if vec[0] > 0 else 1
    else:
        return 4 if vec[1] > 0 else 3
    return 0

## 4. Create the Organizational Model

In [None]:
mpe_model = organizational_model(
    structural_specifications(
        roles={
            "role_leader": role_logic(label_manager=mpe_label_manager).registrer_script_rule(leader_adversary_fun),
            "role_normal": role_logic(label_manager=mpe_label_manager).registrer_script_rule(normal_adversary_fun),
            "role_good": role_logic(label_manager=mpe_label_manager).registrer_script_rule(normal_adversary_fun)
        },
        role_inheritance_relations={}, root_groups={}
    ),
    functional_specifications=functional_specifications(
        goals={}, social_scheme={}, mission_preferences=[]
    ),
    deontic_specifications=deontic_specifications(
        permissions=[], obligations=[
            deontic_specification("role_leader", ["leadadversary_0"], [], time_constraint_type.ANY),
            deontic_specification("role_normal", ["adversary_0", "adversary_1", "adversary_2"], [], time_constraint_type.ANY)
        ]
    )
)

## 5. Create and Wrap the Environment

In [None]:
env = marl.make_env(
    environment_name="mpe",
    map_name="simple_world_comm",
    organizational_model=mpe_model
)

## 6. Initialize Algorithm and Train

In [None]:
mappo = marl.algos.mappo(hyperparam_source="test")
model = marl.build_model(env, mappo, {"core_arch": "mlp", "encode_layer": "128-256"})

# Optional: uncomment to train
# mappo.fit(env, model, stop={"timesteps_total": 1e6})

## 7. Render and Analyze

In [None]:
mappo.render(env, model,
    restore_path={
        "params_path": "./exp_results/mappo_mlp_simple_world_comm_copy/.../params.json",
        "model_path": "./exp_results/mappo_mlp_simple_world_comm_copy/.../checkpoint-20",
        "render_env": True
    },
    local_mode=True,
    share_policy="group"
)

## ✅ Conclusion
In this notebook, we have:
- Defined role-specific logic for predators
- Created an organizational model with MOISE+
- Executed rendering and role-based behavior analysis in the MPE Predator-Prey environment.