### Definitions

In [1]:
import random
import numpy as np
import pygame
from collections import deque
from scipy.spatial import cKDTree
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from gym import spaces

class SugarscapeEnv(MultiAgentEnv):
    def __init__(self, config):
        """
        Initialize the Sugarscape environment.
        
        Args:
            config (dict): Configuration parameters for the environment.
        """
        # Extract configuration parameters
        self.width = config.get("width", 50)
        self.height = config.get("height", 50)
        self.num_agents = config.get("num_agents", 100)
        self.cell_size = config.get("cell_size", 10)
        self.show_sugar_levels = config.get("show_sugar_levels", False)
        self.show_broadcast_radius = config.get("show_broadcast_radius", False)
        self.show_agent_paths = config.get("show_agent_paths", False)
        self.broadcast_radius_default = config.get("broadcast_radius", 5)
        self.seed = config.get("seed", None)
        self.render_mode = config.get("render_mode", False)  # Enable rendering if True

        # Initialize random number generators with the seed for reproducibility
        if self.seed is not None:
            random.seed(self.seed)
            np.random.seed(self.seed)

        # Environment parameters
        self.params = {
            'max_sugar': 5,
            'growth_rate': 1,
            'sugar_peak_frequency': 0.04,
            'sugar_peak_spread': 6,
            'job_center_duration': (40, 100),
            'vision_range': 1,
            'message_expiry': 15,
            'max_relay_messages': 10
        }

        # Initialize job centers and sugar landscape
        self.job_centers = []
        self.sugar = np.zeros((self.height, self.width), dtype=int)
        self.create_initial_sugar_peaks()
        self.update_sugar_landscape()
        self.max_sugar_landscape = self.sugar.copy()

        # Initialize agents
        self.agents = self.initialize_agents()
        self.agent_positions = set((agent['x'], agent['y']) for agent in self.agents)
        self.dead_agents = []

        # Initialize Pygame if rendering is enabled
        if self.render_mode:
            pygame.init()
            self.screen = pygame.display.set_mode((self.width * self.cell_size, self.height * self.cell_size))
            pygame.display.set_caption("Sugarscape Simulation - With Broadcasting")
            self.clock = pygame.time.Clock()
            self.font = pygame.font.Font(None, 10)

        # Data tracking
        self.population_history = []
        self.average_wealth_history = []
        self.gini_coefficient_history = []
        self.timestep = 0

        # Define action and observation spaces
        # Actions: 0: stay, 1: up, 2: up-right, 3: right, 4: down-right,
        #          5: down, 6: down-left, 7: left, 8: up-left
        self.action_space = spaces.Discrete(9)

        # Observation: visible sugar grid + agent's own sugar and metabolism
        # For vision_range=1, visible grid is 3x3
        # Flattened to a vector and concatenated with agent's sugar and metabolism
        self.observation_space = spaces.Box(low=0, high=self.params['max_sugar'],
                                            shape=(3 * 3 + 2,), dtype=np.float32)

    def create_initial_sugar_peaks(self, num_peaks=2):
        """
        Create initial sugar peaks (job centers).
        
        Args:
            num_peaks (int): Number of initial sugar peaks.
        """
        for _ in range(num_peaks):
            self.create_job_center()
        self.update_sugar_landscape()

    def create_job_center(self):
        """
        Create a single job center with random location and duration.
        """
        x, y = np.random.randint(0, self.width), np.random.randint(0, self.height)
        duration = np.random.randint(*self.params['job_center_duration'])
        self.job_centers.append({
            'x': x, 'y': y,
            'duration': duration,
            'max_sugar': self.params['max_sugar']
        })

    def update_sugar_landscape(self):
        """
        Update the sugar landscape based on active job centers.
        """
        self.sugar = np.zeros((self.height, self.width))
        for center in self.job_centers:
            x_grid, y_grid = np.meshgrid(np.arange(self.width), np.arange(self.height))
            distance = np.sqrt((x_grid - center['x']) ** 2 + (y_grid - center['y']) ** 2)
            sugar_level = center['max_sugar'] * np.exp(-distance ** 2 / (2 * self.params['sugar_peak_spread'] ** 2))
            self.sugar += sugar_level
        self.sugar = np.clip(self.sugar, 0, self.params['max_sugar'])
        self.sugar = np.round(self.sugar).astype(int)

    def initialize_agents(self):
        """
        Initialize agents with unique positions.
        
        Returns:
            list: List of agent dictionaries.
        """
        agents = []
        available_positions = set((x, y) for x in range(self.width) for y in range(self.height))
        for i in range(self.num_agents):
            if not available_positions:
                break
            x, y = available_positions.pop()
            agents.append(self.create_agent(i, x, y))
        return agents

    def create_agent(self, id, x, y):
        """
        Create a single agent with random sugar and metabolism.
        
        Args:
            id (int): Unique identifier for the agent.
            x (int): X-coordinate of the agent.
            y (int): Y-coordinate of the agent.
        
        Returns:
            dict: Agent dictionary with attributes.
        """
        return {
            'id': id,
            'x': x,
            'y': y,
            'sugar': np.random.randint(40, 80),
            'metabolism': np.random.randint(1, 3),
            'vision': self.params['vision_range'],
            'broadcast_radius': max(1, int(np.random.normal(self.broadcast_radius_default,
                                                              self.broadcast_radius_default / 3))),
            'messages': deque(maxlen=100),
            'destination': None
        }

    def get_visible_sugar(self, agent):
        """
        Get the visible sugar grid for an agent based on its vision range.
        
        Args:
            agent (dict): Agent dictionary.
        
        Returns:
            np.ndarray: Visible sugar grid.
        """
        x, y = agent['x'], agent['y']
        vision = agent['vision']
        x_min = max(0, x - vision)
        x_max = min(self.width, x + vision + 1)
        y_min = max(0, y - vision)
        y_max = min(self.height, y + vision + 1)
        visible_area = self.sugar[y_min:y_max, x_min:x_max]
        # Normalize sugar levels
        visible_area = visible_area / self.params['max_sugar']
        # If vision_range=1, pad the visible_area to 3x3
        if vision == 1:
            pad_y = 3 - visible_area.shape[0]
            pad_x = 3 - visible_area.shape[1]
            visible_area = np.pad(visible_area, ((0, pad_y), (0, pad_x)), 'constant')
        return visible_area.flatten()

    def broadcast_messages(self):
        """
        Broadcast messages from all agents to their neighbors within broadcast radius.
        """
        if not self.agents:
            return  # No agents to broadcast

        # Extract agent positions
        positions = np.array([[agent['x'], agent['y']] for agent in self.agents])

        # Build cKDTree for efficient spatial queries
        tree = cKDTree(positions)

        # Query all neighbors within broadcast_radius for each agent
        all_neighbors = tree.query_ball_point(positions, self.broadcast_radius_default)

        for i, agent in enumerate(self.agents):
            # Create the message from this agent
            visible_sugar = self.get_visible_sugar(agent).sum()
            message = {
                'sender_id': agent['id'],
                'sugar_amount': visible_sugar,
                'timestep': self.timestep,
                'x': agent['x'],
                'y': agent['y']
            }

            neighbors = all_neighbors[i]
            for neighbor_idx in neighbors:
                if neighbor_idx != i:
                    self.agents[neighbor_idx]['messages'].append(message)

    def reset(self):
        """
        Reset the environment to an initial state.
        
        Returns:
            dict: Dictionary of observations for each agent.
        """
        # Reset job centers and sugar landscape
        self.job_centers = []
        self.create_initial_sugar_peaks()
        self.update_sugar_landscape()

        # Reset agents
        self.agents = self.initialize_agents()
        self.agent_positions = set((agent['x'], agent['y']) for agent in self.agents)
        self.dead_agents = []
        self.timestep = 0

        # Reset messages
        for agent in self.agents:
            agent['messages'].clear()

        # If rendering is enabled, reset Pygame display
        if self.render_mode:
            self.screen.fill((255, 255, 255))
            pygame.display.flip()

        # Return initial observations
        observations = {agent['id']: self.get_observation(agent) for agent in self.agents}
        return observations

    def get_observation(self, agent):
        """
        Get the observation for a single agent.
        
        Args:
            agent (dict): Agent dictionary.
        
        Returns:
            np.ndarray: Observation vector.
        """
        visible_sugar = self.get_visible_sugar(agent)
        # Normalize agent's own sugar and metabolism
        sugar_norm = agent['sugar'] / 100  # Assuming max sugar is 100
        metabolism_norm = agent['metabolism'] / 10  # Assuming max metabolism is 10
        observation = np.concatenate([visible_sugar, [sugar_norm, metabolism_norm]])
        return observation.astype(np.float32)

    def step(self, action_dict):
        """
        Execute one time step within the environment.
        
        Args:
            action_dict (dict): Actions for each agent.
        
        Returns:
            tuple: observations, rewards, dones, infos
        """
        # Apply actions to agents
        for agent in self.agents:
            agent_id = agent['id']
            if agent_id in action_dict:
                action = action_dict[agent_id]
                # Map action to movement
                dx, dy = self.action_to_direction(action)
                new_x = agent['x'] + dx
                new_y = agent['y'] + dy

                # Check if the new position is within bounds and not occupied
                if (0 <= new_x < self.width and 0 <= new_y < self.height and
                        (new_x, new_y) not in self.agent_positions):
                    # Update agent position
                    self.agent_positions.remove((agent['x'], agent['y']))
                    agent['x'] = new_x
                    agent['y'] = new_y
                    self.agent_positions.add((new_x, new_y))

                    # Update destination if agent was moving towards it
                    if agent['destination'] and (new_x, new_y) == agent['destination']:
                        agent['destination'] = None

        # Collect sugar and apply metabolism
        for agent in self.agents:
            collected_sugar = self.sugar[agent['y'], agent['x']]
            agent['sugar'] += collected_sugar
            self.sugar[agent['y'], agent['x']] = 0
            agent['sugar'] -= agent['metabolism']
            agent['sugar'] = int(agent['sugar'])  # Ensure agent sugar is an integer

        # Broadcast messages
        self.broadcast_messages()

        # Clean up expired messages
        for agent in self.agents:
            agent['messages'] = deque(
                [msg for msg in agent['messages'] if self.timestep - msg['timestep'] <= self.params['message_expiry']],
                maxlen=100)

        # Handle agent death
        alive_agents = []
        for agent in self.agents:
            if agent['sugar'] <= 0:
                self.dead_agents.append({'x': agent['x'], 'y': agent['y'], 'death_time': self.timestep})
                self.agent_positions.remove((agent['x'], agent['y']))
            else:
                alive_agents.append(agent)
        self.agents = alive_agents

        self.dead_agents = [agent for agent in self.dead_agents if self.timestep - agent['death_time'] <= 5]

        # Update job centers
        for center in self.job_centers:
            center['duration'] -= 1
        self.job_centers = [center for center in self.job_centers if center['duration'] > 0]
        if np.random.random() < self.params['sugar_peak_frequency']:
            self.create_job_center()
        self.update_sugar_landscape()

        # Collect data
        self.collect_data()

        # Increment timestep
        self.timestep += 1

        # Prepare observations, rewards, dones, infos
        observations = {agent['id']: self.get_observation(agent) for agent in self.agents}
        rewards = {agent['id']: agent['sugar'] for agent in self.agents}  # Reward based on sugar
        dones = {agent['id']: False for agent in self.agents}
        dones['__all__'] = False  # Simulation runs until max_timesteps

        # Optionally render the environment
        if self.render_mode:
            self.render()

        return observations, rewards, dones, {}

    def action_to_direction(self, action):
        """
        Convert discrete action to movement direction.
        
        Args:
            action (int): Action integer.
        
        Returns:
            tuple: (dx, dy)
        """
        # Actions: 0: stay, 1: up, 2: up-right, 3: right, 4: down-right,
        #          5: down, 6: down-left, 7: left, 8: up-left
        action_map = {
            0: (0, 0),
            1: (0, -1),
            2: (1, -1),
            3: (1, 0),
            4: (1, 1),
            5: (0, 1),
            6: (-1, 1),
            7: (-1, 0),
            8: (-1, -1)
        }
        return action_map.get(action, (0, 0))  # Default to stay if invalid action

    def render(self):
        """
        Render the environment using Pygame.
        """
        self.screen.fill((255, 255, 255))

        for y in range(self.height):
            for x in range(self.width):
                sugar_level = self.sugar[y, x]
                color = self.get_color(sugar_level)
                pygame.draw.rect(self.screen, color,
                                 (x * self.cell_size, y * self.cell_size, self.cell_size, self.cell_size))

                if self.show_sugar_levels:
                    sugar_text = self.font.render(f"{sugar_level}", True, (0, 0, 0))
                    text_rect = sugar_text.get_rect(center=(x * self.cell_size + self.cell_size // 2,
                                                            y * self.cell_size + self.cell_size // 2))
                    self.screen.blit(sugar_text, text_rect)

        for dead_agent in self.dead_agents:
            pygame.draw.circle(self.screen, (128, 128, 128),
                               (int(dead_agent['x'] * self.cell_size + self.cell_size / 2),
                                int(dead_agent['y'] * self.cell_size + self.cell_size / 2)),
                               int(self.cell_size / 3))

        for agent in self.agents:
            if self.show_broadcast_radius:
                pygame.draw.circle(self.screen, (200, 200, 200),
                                   (int(agent['x'] * self.cell_size + self.cell_size / 2),
                                    int(agent['y'] * self.cell_size + self.cell_size / 2)),
                                   int(agent['broadcast_radius'] * self.cell_size), 1)

            pygame.draw.circle(self.screen, (255, 0, 0),
                               (int(agent['x'] * self.cell_size + self.cell_size / 2),
                                int(agent['y'] * self.cell_size + self.cell_size / 2)),
                               int(self.cell_size / 3))

            if self.show_agent_paths and agent['destination']:
                pygame.draw.line(self.screen, (0, 255, 0),
                                 (int(agent['x'] * self.cell_size + self.cell_size / 2),
                                  int(agent['y'] * self.cell_size + self.cell_size / 2)),
                                 (int(agent['destination'][0] * self.cell_size + self.cell_size / 2),
                                  int(agent['destination'][1] * self.cell_size + self.cell_size / 2)),
                                 1)

        pygame.display.flip()

    def get_color(self, sugar_level):
        """
        Get color based on sugar level.
        
        Args:
            sugar_level (int): Sugar level at a grid cell.
        
        Returns:
            tuple: RGB color.
        """
        if sugar_level == 0:
            return (255, 255, 255)
        else:
            intensity = sugar_level / self.params['max_sugar']
            return (255, 255, int(255 * (1 - intensity)))

    def collect_data(self):
        """
        Collect data for analysis.
        """
        population = len(self.agents)
        total_wealth = sum(agent['sugar'] for agent in self.agents)
        average_wealth = total_wealth / population if population > 0 else 0

        self.population_history.append(population)
        self.average_wealth_history.append(average_wealth)
        self.gini_coefficient_history.append(self.calculate_gini_coefficient())

    def calculate_gini_coefficient(self):
        """
        Calculate the Gini coefficient for wealth distribution.
        
        Returns:
            float: Gini coefficient.
        """
        if not self.agents:
            return 0
        wealth_values = sorted(agent['sugar'] for agent in self.agents)
        cumulative_wealth = np.cumsum(wealth_values)
        n = len(wealth_values)
        return (np.sum((2 * np.arange(1, n + 1) - n - 1) * wealth_values) /
                (n * np.sum(wealth_values)))

    def plot_results(self):
        """
        Plot the collected data.
        """
        import matplotlib.pyplot as plt

        plt.figure(figsize=(15, 5))

        plt.subplot(131)
        plt.plot(self.population_history)
        plt.title('Population over Time')
        plt.xlabel('Timestep')
        plt.ylabel('Population')

        plt.subplot(132)
        plt.plot(self.average_wealth_history)
        plt.title('Average Wealth over Time')
        plt.xlabel('Timestep')
        plt.ylabel('Average Wealth')

        plt.subplot(133)
        plt.plot(self.gini_coefficient_history)
        plt.title('Gini Coefficient over Time')
        plt.xlabel('Timestep')
        plt.ylabel('Gini Coefficient')

        plt.tight_layout()
        plt.show()


pygame 2.6.0 (SDL 2.28.4, Python 3.11.9)
Hello from the pygame community. https://www.pygame.org/contribute.html



In [2]:
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPO
from SugarscapeEnv import SugarscapeEnv  # Ensure SugarscapeEnv is defined in a previous cell

def main():
    # Initialize Ray
    ray.init(ignore_reinit_error=True)
    
    # Environment configuration
    env_config = {
        "width": 50,
        "height": 50,
        "num_agents": 100,  # Start with 100 agents for efficiency
        "cell_size": 10,
        "show_sugar_levels": False,
        "show_broadcast_radius": False,  # Disable rendering during training
        "show_agent_paths": False,
        "broadcast_radius": 15,
        "seed": 23,
        "render_mode": False  # Disable rendering
    }
    
    # Instantiate the environment to retrieve observation and action spaces
    env_instance = SugarscapeEnv(config=env_config)
    
    # Define the configuration for PPO
    config = {
        "env": SugarscapeEnv,
        "env_config": env_config,
        "num_gpus": 0,  # Set to >0 if GPUs are available
        "num_workers": 2,  # Adjust based on your CPU cores
        "framework": "torch",  # or "tf" for TensorFlow
        "multiagent": {
            "policies": {
                "shared_policy": (None, env_instance.observation_space, env_instance.action_space, {})
            },
            "policy_mapping_fn": lambda agent_id, episode, worker, **kwargs: "shared_policy",
        },
        "model": {
            "fcnet_hiddens": [128, 128],
            "fcnet_activation": "relu",
        },
        "lr": 5e-4,
        "num_sgd_iter": 10,
        "sgd_minibatch_size": 256,
        "train_batch_size": 4000,
        "rollout_fragment_length": 200,
    }
    
    # Initialize the PPO algorithm
    trainer = PPO(config=config, env=SugarscapeEnv)
    
    # Training loop parameters
    num_iterations = 1000  # Adjust as needed
    checkpoint_freq = 200  # Save checkpoint every 200 iterations
    
    for i in range(num_iterations):
        result = trainer.train()
        
        # Print progress every 100 iterations
        if (i + 1) % 100 == 0:
            print(f"Iteration {i + 1}: average reward {result['episode_reward_mean']}")
        
        # Save checkpoints periodically
        if (i + 1) % checkpoint_freq == 0:
            checkpoint = trainer.save()
            print(f"Checkpoint saved at {checkpoint}")
    
    # Save the final model
    final_checkpoint = trainer.save()
    print(f"Final checkpoint saved at {final_checkpoint}")
    
    # Shutdown Ray
    ray.shutdown()

# Execute the training script
main()


ModuleNotFoundError: No module named 'SugarscapeEnv'

In [4]:
!pip install -U ray[rllib]




In [5]:
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPO

print("Ray and RLlib are successfully installed and updated!")


Ray and RLlib are successfully installed and updated!
