<a href="https://colab.research.google.com/github/mahault/Multi-agent-sustainability/blob/main/Multiagent_Sustainability_toy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Environment Setup


*   Grid Setup: The environment consists of a 3x3 grid.
*   Agent Dynamics: Two agents that can move, communicate, and consume resources.
*   Resource Dynamics: Water and food are placed randomly and can deplete and replenish.




In [2]:
!pip install pettingzoo

Collecting pettingzoo
  Downloading pettingzoo-1.24.3-py3-none-any.whl (847 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/847.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/847.8 kB[0m [31m1.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m604.2/847.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m847.8/847.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[0mCollecting gymnasium>=0.28.0 (from pettingzoo)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium>=0.28.0->pettingzoo)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages:

In [4]:
from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector
from gym import spaces
import numpy as np

class ForagingEnv(AECEnv):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super().__init__()
        self.grid_size = 3
        self.n_agents = 2
        self.agents = ["agent_" + str(i) for i in range(self.n_agents)]
        self.agent_selector = agent_selector(self.agents)
        self.action_spaces = {agent: spaces.Discrete(6) for agent in self.agents}  # Add one for "communicate" action
        self.observation_spaces = {agent: spaces.Dict({
            "grid": spaces.Box(low=0, high=2, shape=(self.grid_size, self.grid_size, 3), dtype=np.float32),
            "state": spaces.Dict({
                "position": spaces.MultiDiscrete([self.grid_size, self.grid_size]),
                "water_timer": spaces.Discrete(3),  # Timesteps until death without water
                "food_timer": spaces.Discrete(7),  # Timesteps until death without food
                "beliefs": spaces.Dict({
                    "water_replenish_rate": spaces.Discrete(10),  # Example max rate
                    "food_replenish_rate": spaces.Discrete(10),
                })
            })
        }) for agent in self.agents}

        self.grid = np.zeros((self.grid_size, self.grid_size, 3), dtype=np.float32)  # Third dimension for agent presence, water, food
        self.resource_counters = {"water": [np.inf, 0], "food": [np.inf, 0]}  # [acquisitions left, replenishment timer]
        self.agent_states = {agent: {"position": None, "water_timer": 3, "food_timer": 7, "beliefs": {"water_replenish_rate": np.inf, "food_replenish_rate": np.inf}} for agent in self.agents}
        self.current_agent = None

    def reset(self):
        self.agent_selector.reinit(self.agents)
        self.current_agent = self.agent_selector.next()
        self.grid *= 0  # Clear the grid

        # Randomly place water and food, initialize resource counters
        water_position = np.random.choice(self.grid_size**2)
        food_position = np.random.choice(self.grid_size**2)
        while food_position == water_position:
            food_position = np.random.choice(self.grid_size**2)

        self.grid[water_position // self.grid_size, water_position % self.grid_size, 1] = 1
        self.grid[food_position // self.grid_size, food_position % self.grid_size, 2] = 1
        self.resource_counters["water"] = [np.random.randint(1, 5), np.random.randint(5, 15)]  # Random example values
        self.resource_counters["food"] = [np.random.randint(1, 5), np.random.randint(5, 15)]

        # Set initial positions for agents and reset their states
        for i, agent in enumerate(self.agents):
            while True:
                pos = np.random.choice(self.grid_size**2)
                if self.grid[pos // self.grid_size, pos % self.grid_size].sum() == 0:  # Ensure the position is empty
                    self.grid[pos // self.grid_size, pos % self.grid_size, 0] = i + 1  # Mark agent's presence
                    self.agent_states[agent]["position"] = (pos // self.grid_size, pos % self.grid_size)
                    self.agent_states[agent]["water_timer"] = 3
                    self.agent_states[agent]["food_timer"] = 7
                    break

    def step(self, action):
        # Update the environment state based on the agent's action
        # Handle movement, consumption, communication, and update timers and beliefs
        pass

    def observe(self, agent):
        # Return agent-specific observations including both grid and their internal state
        observation = self.grid.copy()
        agent_state = self.agent_states[agent]
        return {"grid": observation, "state": agent_state}

    def render(self, mode="human"):
        # Visualize the current state of the environment, including agent positions, resources, and timers
        pass



In [5]:
# Usage
env = ForagingEnv()
env.reset()
print(env.observe(env.agents[0]))

{'grid': array([[[1., 0., 0.],
        [0., 1., 0.],
        [2., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 1.]]], dtype=float32), 'state': {'position': (0, 0), 'water_timer': 3, 'food_timer': 7, 'beliefs': {'water_replenish_rate': inf, 'food_replenish_rate': inf}}}
