## Training

In [6]:
# Import necessary libraries
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

class DQN(nn.Module):
    def __init__(self, input_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.output = nn.Linear(128, action_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.output(x)

class SugarscapeEnvironment:
    def __init__(self, width, height, num_agents, params):
        self.width = width
        self.height = height
        self.num_agents = num_agents
        self.params = params

        self.job_centers = []
        self.sugar = np.zeros((self.height, self.width), dtype=int)
        self.create_initial_sugar_peaks()
        self.max_sugar_landscape = self.sugar.copy()
        self.agents = self.initialize_agents()
        self.agent_positions = set((agent['x'], agent['y']) for agent in self.agents)
        self.dead_agents = []

        self.timestep = 0

        # Initialize DQN components
        self.observation_space_size = (self.params['vision_range'] * 2 + 1) ** 2 + 2  # Observation size
        self.action_space_size = 5  # Up, Down, Left, Right, Stay
        self.policy_net = DQN(self.observation_space_size, self.action_space_size)
        self.target_net = DQN(self.observation_space_size, self.action_space_size)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.params['learning_rate'])
        self.memory = deque(maxlen=10000)
        self.batch_size = self.params['batch_size']
        self.gamma = self.params['gamma']
        self.epsilon = self.params['epsilon_start']

    def create_initial_sugar_peaks(self, num_peaks=2):
        for _ in range(num_peaks):
            self.create_job_center()
        self.update_sugar_landscape()

    def create_job_center(self):
        x, y = np.random.randint(0, self.width), np.random.randint(0, self.height)
        duration = np.random.randint(*self.params['job_center_duration'])
        self.job_centers.append({
            'x': x, 'y': y,
            'duration': duration,
            'max_sugar': self.params['max_sugar']
        })

    def update_sugar_landscape(self):
        self.sugar = np.zeros((self.height, self.width))
        for center in self.job_centers:
            x_grid, y_grid = np.meshgrid(np.arange(self.width), np.arange(self.height))
            distance = np.sqrt((x_grid - center['x']) ** 2 + (y_grid - center['y']) ** 2)
            sugar_level = center['max_sugar'] * np.exp(-distance ** 2 / (2 * self.params['sugar_peak_spread'] ** 2))
            self.sugar += sugar_level
        self.sugar = np.clip(self.sugar, 0, self.params['max_sugar'])
        self.sugar = np.round(self.sugar).astype(int)

    def initialize_agents(self):
        agents = []
        available_positions = set((x, y) for x in range(self.width) for y in range(self.height))
        for i in range(self.num_agents):
            if not available_positions:
                break
            x, y = available_positions.pop()
            agents.append(self.create_agent(i, x, y))
        return agents

    def create_agent(self, id, x, y):
        return {
            'id': id, 'x': x, 'y': y,
            'sugar': np.random.randint(40, 80),
            'metabolism': np.random.randint(1, 3),
            'vision': np.random.randint(1, self.params['vision_range'] + 1),
            'broadcast_radius': max(1, int(np.random.normal(self.params['broadcast_radius'], self.params['broadcast_radius'] / 3))),
            'messages': deque(maxlen=100),
            'destination': None,
            'done': False
        }

    def get_agent_observation(self, agent):
        x, y = agent['x'], agent['y']
        obs_range = self.params['vision_range']
        # Get local sugar levels
        sugar_obs = self.sugar[max(0, y - obs_range):min(self.height, y + obs_range + 1),
                               max(0, x - obs_range):min(self.width, x + obs_range + 1)]
        # Pad the observation if at the edges
        pad_width_x = (max(0, obs_range - x), max(0, x + obs_range + 1 - self.width))
        pad_width_y = (max(0, obs_range - y), max(0, y + obs_range + 1 - self.height))
        sugar_obs = np.pad(sugar_obs, (pad_width_y, pad_width_x), mode='constant', constant_values=0)
        sugar_obs = sugar_obs.flatten()
        # Include agent's own sugar level and metabolism
        agent_features = np.array([agent['sugar'], agent['metabolism']])
        observation = np.concatenate((sugar_obs, agent_features))
        return observation

    def select_action(self, observation):
        # Epsilon-greedy action selection
        if random.random() < self.epsilon:
            action = random.randint(0, self.action_space_size - 1)
        else:
            with torch.no_grad():
                observation = torch.tensor(observation, dtype=torch.float32)
                q_values = self.policy_net(observation)
                action = torch.argmax(q_values).item()
        return action

    def move_agent(self, agent, action):
        x, y = agent['x'], agent['y']
        possible_moves = {
            0: (x, max(0, y - 1)),        # Up
            1: (x, min(self.height - 1, y + 1)),  # Down
            2: (max(0, x - 1), y),        # Left
            3: (min(self.width - 1, x + 1), y),   # Right
            4: (x, y)                     # Stay
        }
        new_x, new_y = possible_moves[action]
        if (new_x, new_y) not in self.agent_positions:
            self.agent_positions.remove((agent['x'], agent['y']))
            agent['x'], agent['y'] = new_x, new_y
            self.agent_positions.add((new_x, new_y))
        # else: If the space is occupied, the agent stays in place

    def step(self):
        # Update job centers
        for center in self.job_centers:
            center['duration'] -= 1
        self.job_centers = [center for center in self.job_centers if center['duration'] > 0]
        if np.random.random() < self.params['sugar_peak_frequency']:
            self.create_job_center()
        self.update_sugar_landscape()

        # For each agent, perform action and collect experience
        for agent in self.agents:
            observation = self.get_agent_observation(agent)
            action = self.select_action(observation)
            prev_x, prev_y = agent['x'], agent['y']
            self.move_agent(agent, action)
            collected_sugar = self.sugar[agent['y'], agent['x']]
            agent['sugar'] += collected_sugar
            self.sugar[agent['y'], agent['x']] = 0
            agent['sugar'] -= agent['metabolism']
            reward = collected_sugar - agent['metabolism']
            done = agent['sugar'] <= 0

            next_observation = self.get_agent_observation(agent)
            experience = (observation, action, reward, next_observation, done)
            self.memory.append(experience)

            agent['done'] = done

        # Update agent list
        alive_agents = []
        for agent in self.agents:
            if agent['done']:
                self.dead_agents.append({'x': agent['x'], 'y': agent['y'], 'death_time': self.timestep})
                self.agent_positions.remove((agent['x'], agent['y']))
            else:
                alive_agents.append(agent)
        self.agents = alive_agents

        # Perform learning step
        if len(self.memory) >= self.batch_size:
            self.learn()

        # Decay epsilon
        if self.epsilon > self.params['epsilon_min']:
            self.epsilon *= self.params['epsilon_decay']

        self.timestep += 1

    def learn(self):
        batch = random.sample(self.memory, self.batch_size)
        observations, actions, rewards, next_observations, dones = zip(*batch)

        observations = torch.tensor(observations, dtype=torch.float32)
        actions = torch.tensor(actions, dtype=torch.int64).unsqueeze(1)
        rewards = torch.tensor(rewards, dtype=torch.float32).unsqueeze(1)
        next_observations = torch.tensor(next_observations, dtype=torch.float32)
        dones = torch.tensor(dones, dtype=torch.float32).unsqueeze(1)

        q_values = self.policy_net(observations).gather(1, actions)
        with torch.no_grad():
            next_q_values = self.target_net(next_observations).max(1)[0].unsqueeze(1)
            target_q_values = rewards + (1 - dones) * self.gamma * next_q_values

        loss = nn.MSELoss()(q_values, target_q_values)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Update target network
        if self.timestep % self.params['target_update'] == 0:
            self.target_net.load_state_dict(self.policy_net.state_dict())

    def train(self, num_episodes, max_timesteps):
        for episode in range(num_episodes):
            # Reset environment
            self.reset_environment()
            episode_reward = 0
            for timestep in range(max_timesteps):
                self.step()
                episode_reward += sum(agent['sugar'] for agent in self.agents)
                if not self.agents:
                    break
            print(f"Episode {episode+1}/{num_episodes}, Total Reward: {episode_reward}, Epsilon: {self.epsilon:.4f}")

            # Save model periodically
            if (episode + 1) % self.params['save_interval'] == 0:
                torch.save(self.policy_net.state_dict(), f"policy_net_episode_{episode+1}.pth")

    def reset_environment(self):
        self.job_centers = []
        self.sugar = np.zeros((self.height, self.width), dtype=int)
        self.create_initial_sugar_peaks()
        self.agents = self.initialize_agents()
        self.agent_positions = set((agent['x'], agent['y']) for agent in self.agents)
        self.dead_agents = []
        self.timestep = 0

# Training Parameters
params = {
    'max_sugar': 5,
    'growth_rate': 1,
    'sugar_peak_frequency': 0.04,
    'sugar_peak_spread': 6,
    'job_center_duration': (40, 100),
    'vision_range': 1,
    'message_expiry': 15,
    'max_relay_messages': 10,
    'gamma': 0.99,
    'epsilon_start': 1.0,
    'epsilon_min': 0.1,
    'epsilon_decay': 0.995,
    'learning_rate': 1e-4,
    'batch_size': 64,
    'target_update': 100,
    'broadcast_radius': 15,
    'save_interval': 10  # Save model every 10 episodes
}

# Create environment and train
env = SugarscapeEnvironment(width=50, height=50, num_agents=1000, params=params)
num_episodes = 3
max_timesteps = 1000

env.train(num_episodes, max_timesteps)

torch.save(env.policy_net.state_dict(), "policy_net_final.pth")


Episode 1/3, Total Reward: 32363166, Epsilon: 0.0997
Episode 2/3, Total Reward: 16264675, Epsilon: 0.0997
Episode 3/3, Total Reward: 59109766, Epsilon: 0.0997


## Execution Phase

In [7]:
# Import necessary libraries
import random
import numpy as np
import pygame
import torch
from collections import deque

class SugarscapeEnvironmentVisual:
    def __init__(self, width, height, num_agents, params):
        self.width = width
        self.height = height
        self.num_agents = num_agents
        self.params = params

        self.job_centers = []
        self.sugar = np.zeros((self.height, self.width), dtype=int)
        self.create_initial_sugar_peaks()
        self.max_sugar_landscape = self.sugar.copy()
        self.agents = self.initialize_agents()
        self.agent_positions = set((agent['x'], agent['y']) for agent in self.agents)
        self.dead_agents = []

        # Initialize PyGame
        pygame.init()
        self.cell_size = 10
        self.screen = pygame.display.set_mode((width * self.cell_size, height * self.cell_size))
        pygame.display.set_caption("Sugarscape Simulation - Execution")
        self.clock = pygame.time.Clock()

        self.font = pygame.font.Font(None, 10)

        self.population_history = []
        self.average_wealth_history = []
        self.gini_coefficient_history = []
        self.timestep = 0

        # Load trained model
        self.observation_space_size = (self.params['vision_range'] * 2 + 1) ** 2 + 2  # Observation size
        self.action_space_size = 5  # Up, Down, Left, Right, Stay
        self.policy_net = DQN(self.observation_space_size, self.action_space_size)
        self.policy_net.load_state_dict(torch.load("policy_net_final.pth"))
        self.policy_net.eval()

    # ... (Same methods as in the training environment, excluding learning parts)

    def select_action(self, observation):
        with torch.no_grad():
            observation = torch.tensor(observation, dtype=torch.float32)
            q_values = self.policy_net(observation)
            action = torch.argmax(q_values).item()
        return action

    def step(self):
        # Update job centers
        for center in self.job_centers:
            center['duration'] -= 1
        self.job_centers = [center for center in self.job_centers if center['duration'] > 0]
        if np.random.random() < self.params['sugar_peak_frequency']:
            self.create_job_center()
        self.update_sugar_landscape()

        # For each agent, perform action
        for agent in self.agents:
            observation = self.get_agent_observation(agent)
            action = self.select_action(observation)
            self.move_agent(agent, action)
            collected_sugar = self.sugar[agent['y'], agent['x']]
            agent['sugar'] += collected_sugar
            self.sugar[agent['y'], agent['x']] = 0
            agent['sugar'] -= agent['metabolism']
            agent['done'] = agent['sugar'] <= 0

        # Update agent list
        alive_agents = []
        for agent in self.agents:
            if agent['done']:
                self.dead_agents.append({'x': agent['x'], 'y': agent['y'], 'death_time': self.timestep})
                self.agent_positions.remove((agent['x'], agent['y']))
            else:
                alive_agents.append(agent)
        self.agents = alive_agents

        self.dead_agents = [agent for agent in self.dead_agents if self.timestep - agent['death_time'] <= 5]

        self.collect_data()
        self.timestep += 1

    def render(self):
        self.screen.fill((255, 255, 255))

        for y in range(self.height):
            for x in range(self.width):
                sugar_level = self.sugar[y, x]
                color = self.get_color(sugar_level)
                pygame.draw.rect(self.screen, color,
                                 (x * self.cell_size, y * self.cell_size, self.cell_size, self.cell_size))

        for dead_agent in self.dead_agents:
            pygame.draw.circle(self.screen, (128, 128, 128),
                               (int(dead_agent['x'] * self.cell_size + self.cell_size / 2),
                                int(dead_agent['y'] * self.cell_size + self.cell_size / 2)),
                               int(self.cell_size / 3))

        for agent in self.agents:
            pygame.draw.circle(self.screen, (255, 0, 0),
                               (int(agent['x'] * self.cell_size + self.cell_size / 2),
                                int(agent['y'] * self.cell_size + self.cell_size / 2)),
                               int(self.cell_size / 3))

        pygame.display.flip()

    def get_color(self, sugar_level):
        if sugar_level == 0:
            return (255, 255, 255)
        else:
            intensity = sugar_level / self.params['max_sugar']
            return (255, 255, int(255 * (1 - intensity)))

    def collect_data(self):
        population = len(self.agents)
        total_wealth = sum(agent['sugar'] for agent in self.agents)
        average_wealth = total_wealth / population if population > 0 else 0

        self.population_history.append(population)
        self.average_wealth_history.append(average_wealth)
        self.gini_coefficient_history.append(self.calculate_gini_coefficient())

    def calculate_gini_coefficient(self):
        if not self.agents:
            return 0
        wealth_values = sorted(agent['sugar'] for agent in self.agents)
        cumulative_wealth = np.cumsum(wealth_values)
        return (np.sum((2 * np.arange(1, len(wealth_values) + 1) - len(wealth_values) - 1) * wealth_values) /
                (len(wealth_values) * np.sum(wealth_values)))

    def plot_results(self):
        import matplotlib.pyplot as plt

        plt.figure(figsize=(15, 5))

        plt.subplot(131)
        plt.plot(self.population_history)
        plt.title('Population over Time')
        plt.xlabel('Timestep')
        plt.ylabel('Population')

        plt.subplot(132)
        plt.plot(self.average_wealth_history)
        plt.title('Average Wealth over Time')
        plt.xlabel('Timestep')
        plt.ylabel('Average Wealth')

        plt.subplot(133)
        plt.plot(self.gini_coefficient_history)
        plt.title('Gini Coefficient over Time')
        plt.xlabel('Timestep')
        plt.ylabel('Gini Coefficient')

        plt.tight_layout()
        plt.show()

    def run_simulation(self, max_timesteps=1000):
        running = True
        while running and self.timestep < max_timesteps:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False

            self.step()
            self.render()
            self.clock.tick(5)

        self.plot_results()

# Execution Parameters (same as training)
params = {
    'max_sugar': 5,
    'growth_rate': 1,
    'sugar_peak_frequency': 0.04,
    'sugar_peak_spread': 6,
    'job_center_duration': (40, 100),
    'vision_range': 1,
    'message_expiry': 15,
    'max_relay_messages': 10,
    'broadcast_radius': 15
}

# Create environment and run simulation
env_visual = SugarscapeEnvironmentVisual(width=50, height=50, num_agents=1000, params=params)
env_visual.run_simulation(max_timesteps=1000)


AttributeError: 'SugarscapeEnvironmentVisual' object has no attribute 'create_initial_sugar_peaks'