<a href="https://colab.research.google.com/github/mahault/Multi-agent-sustainability/blob/main/Multiagent_Sustainability_toy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Environment Setup


*   Grid Setup: The environment consists of a 3x3 grid.
*   Agent Dynamics: Two agents that can move, communicate, and consume resources.
*   Resource Dynamics: Water and food are placed randomly and can deplete and replenish.




In [5]:
!pip install pettingzoo



In [8]:
from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector
from gym import spaces
import numpy as np

class ForagingEnv(AECEnv):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super().__init__()
        self.grid_size = 3
        self.n_agents = 2
        self.agents = ["agent_" + str(i) for i in range(self.n_agents)]
        self.agent_selector = agent_selector(self.agents)
        self.action_spaces = {agent: spaces.Discrete(6) for agent in self.agents}  # Add one for "communicate" action
        self.observation_spaces = {agent: spaces.Dict({
            "grid": spaces.Box(low=0, high=2, shape=(self.grid_size, self.grid_size, 3), dtype=np.float32),
            "state": spaces.Dict({
                "position": spaces.MultiDiscrete([self.grid_size, self.grid_size]),
                "water_timer": spaces.Discrete(3),  # Timesteps until death without water
                "food_timer": spaces.Discrete(7),  # Timesteps until death without food
                "beliefs": spaces.Dict({
                    "water_replenish_rate": spaces.Discrete(10),  # Example max rate
                    "food_replenish_rate": spaces.Discrete(10),
                })
            })
        }) for agent in self.agents}

        self.grid = np.zeros((self.grid_size, self.grid_size, 3), dtype=np.float32)  # Third dimension for agent presence, water, food
        self.resource_counters = {"water": [np.inf, 0], "food": [np.inf, 0]}  # [acquisitions left, replenishment timer]
        self.agent_states = {agent: {"position": None, "water_timer": 3, "food_timer": 7, "beliefs": {"water_replenish_rate": np.inf, "food_replenish_rate": np.inf}} for agent in self.agents}
        self.current_agent = None

    def reset(self):
        self.agent_selector.reinit(self.agents)
        self.current_agent = self.agent_selector.next()
        self.grid *= 0  # Clear the grid

        # Randomly place water and food, initialize resource counters
        water_position = np.random.choice(self.grid_size**2)
        food_position = np.random.choice(self.grid_size**2)
        while food_position == water_position:
            food_position = np.random.choice(self.grid_size**2)

        self.grid[water_position // self.grid_size, water_position % self.grid_size, 1] = 1
        self.grid[food_position // self.grid_size, food_position % self.grid_size, 2] = 1
        self.resource_counters["water"] = [np.random.randint(1, 5), np.random.randint(5, 15)]  # Random example values
        self.resource_counters["food"] = [np.random.randint(1, 5), np.random.randint(5, 15)]

        # Set initial positions for agents and reset their states
        for i, agent in enumerate(self.agents):
            while True:
                pos = np.random.choice(self.grid_size**2)
                if self.grid[pos // self.grid_size, pos % self.grid_size].sum() == 0:  # Ensure the position is empty
                    self.grid[pos // self.grid_size, pos % self.grid_size, 0] = i + 1  # Mark agent's presence
                    self.agent_states[agent]["position"] = (pos // self.grid_size, pos % self.grid_size)
                    self.agent_states[agent]["water_timer"] = 3
                    self.agent_states[agent]["food_timer"] = 7
                    break

    def step(self, action):
      agent = self.current_agent
      reward = 0  # Initialize reward for the current step

      if action < 4:  # Movement actions
          self.move_agent(agent, action)
      elif action == 4:  # Consumption action
          reward += self.consume_resources(agent)
      elif action == 5:  # Communication action
          self.communicate(agent)
          # Consider if and how communication should affect the reward

      # Survival reward for being alive another timestep
      reward += 1

      # Apply penalties for failing to meet survival requirements
      self.agent_states[agent]["water_timer"] -= 1
      self.agent_states[agent]["food_timer"] -= 1
      if self.agent_states[agent]["water_timer"] <= 0 or self.agent_states[agent]["food_timer"] <= 0:
          self.terminate_agent(agent)
          reward -= 50  # Example penalty for dying

      # Move to the next agent
      self.current_agent = self.agent_selector.next()

      return reward

    def move_agent(self, agent, direction):
        pos = self.agent_states[agent]["position"]
        if direction == 0:  # Up
            new_pos = (max(pos[0] - 1, 0), pos[1])
        elif direction == 1:  # Down
            new_pos = (min(pos[0] + 1, self.grid_size - 1), pos[1])
        elif direction == 2:  # Left
            new_pos = (pos[0], max(pos[1] - 1, 0))
        else:  # Right
            new_pos = (pos[0], min(pos[1] + 1, self.grid_size - 1))

        # Update position if the new position is not occupied
        if self.grid[new_pos[0], new_pos[1], 0] == 0:
            self.grid[pos[0], pos[1], 0] = 0  # Remove agent from old position
            self.grid[new_pos[0], new_pos[1], 0] = 1  # Add agent to new position
            self.agent_states[agent]["position"] = new_pos

    def consume_resources(self, agent):
      pos = self.agent_states[agent]["position"]
      reward = 0
      if self.grid[pos[0], pos[1], 1] == 1:  # Water present
          self.agent_states[agent]["water_timer"] = 3  # Reset water timer
          reward += 10  # Example reward for consuming water
          # Handle resource depletion logic here
      elif self.grid[pos[0], pos[1], 2] == 1:  # Food present
          self.agent_states[agent]["food_timer"] = 7  # Reset food timer
          reward += 10  # Example reward for consuming food
          # Handle resource depletion logic here
      return reward

    def communicate(self, agent):
        # Implement logic for agents to communicate their findings and beliefs
        pass

    def terminate_agent(self, agent):
        # Implement logic to remove or disable the agent upon death
        pass

    def observe(self, agent):
        # Return agent-specific observations including both grid and their internal state
        observation = self.grid.copy()
        agent_state = self.agent_states[agent]
        return {"grid": observation, "state": agent_state}

    def render(self, mode="human"):
        # Visualize the current state of the environment, including agent positions, resources, and timers
        pass



In [9]:
# Usage
env = ForagingEnv()
env.reset()
print(env.observe(env.agents[0]))

{'grid': array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 1.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[1., 0., 0.],
        [2., 0., 0.],
        [0., 1., 0.]]], dtype=float32), 'state': {'position': (2, 0), 'water_timer': 3, 'food_timer': 7, 'beliefs': {'water_replenish_rate': inf, 'food_replenish_rate': inf}}}
