In [712]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [713]:
# Define reward matrices
baseline_reward_matrix = np.array([[3, 1], [3, 0]])
in_group_reward_matrix = np.array([[4, 1], [3, 0]])
out_group_reward_matrix = np.array([[7, 1], [5, 0]])

In [714]:
# Define agent class and simulation function
class Agent:
    def __init__(self, group, coop_prob):
        self.group = group
        self.coop_prob = coop_prob
        self.total_reward = 0

In [715]:
def select_pairs(agents):
    pairs = []
    available_agents = list(agents)  # Convert set to list
    
    while len(pairs) < len(agents) // 2 and len(available_agents) >= 2:
        agent1, agent2 = random.sample(available_agents, 2)
        if agent1 != agent2:  # Ensure the same agent isn't paired with itself
            pairs.append((agent1, agent2))
            available_agents.remove(agent1)
            available_agents.remove(agent2)
    
    return pairs

def simulate(agents, reward_matrix, iterations=100, in_group_prob=0.7, out_group_prob=0.3):
    for _ in range(iterations):
        pairs = select_pairs(agents)
        for agent1, agent2 in pairs:
            if agent1.group == agent2.group:
                current_reward_matrix = in_group_reward_matrix
                agent1_cooperates = np.random.rand() < in_group_prob
                agent2_cooperates = np.random.rand() < in_group_prob
            elif agent1.group == 'none' and agent2.group == 'none':
                current_reward_matrix = baseline_reward_matrix
                agent1_cooperates = np.random.rand() < 0.5
                agent2_cooperates = np.random.rand() < 0.5
            else:
                current_reward_matrix = out_group_reward_matrix
                agent1_cooperates = np.random.rand() < out_group_prob
                agent2_cooperates = np.random.rand() < out_group_prob
            
            if agent1_cooperates and agent2_cooperates:
                reward = current_reward_matrix[0, 0]  # both cooperate
            elif agent1_cooperates and not agent2_cooperates:
                reward = current_reward_matrix[0, 1]  # agent1 cooperates, agent2 does not
            elif not agent1_cooperates and agent2_cooperates:
                reward = current_reward_matrix[1, 0]  # agent1 does not cooperate, agent2 cooperates
            else:
                reward = current_reward_matrix[1, 1]  # both do not cooperate
            
            agent1.total_reward += reward
            agent2.total_reward += reward

            if agent1.group == agent2.group:
                agent1.total_in_group_reward += reward
                agent2.total_in_group_reward += reward
            else:
                agent1.total_out_group_reward += reward
                agent2.total_out_group_reward += reward

In [716]:
# Create agents for each condition
agents_baseline = [Agent(group='none', coop_prob=0.5) for _ in range(30)]
agents_compare = [Agent(group='in_group', coop_prob=0.7) for _ in range(15)] + [Agent(group='out_group', coop_prob=0.3) for _ in range(15)]
agents_in_group = [Agent(group='in_group', coop_prob=0.7) for _ in range(15)] + [Agent(group='out_group', coop_prob=0.3) for _ in range(15)]
agents_out_group = [Agent(group='out_group', coop_prob=0.7) for _ in range(15)] + [Agent(group='in_group', coop_prob=0.3) for _ in range(15)]

# Run simulations
simulate(agents_baseline, baseline_reward_matrix)
simulate(agents_compare, in_group_reward_matrix, in_group_prob=0.7, out_group_prob=0.3)
simulate(agents_in_group, in_group_reward_matrix, in_group_prob=0.7, out_group_prob=0.3)
simulate(agents_out_group, out_group_reward_matrix, in_group_prob=0.7, out_group_prob=0.3)

# Collect results
baseline_rewards = [agent.total_reward for agent in agents_baseline]
baseline_compare = [agent.total_reward for agent in agents_compare]
in_group_rewards = [agent.total_in_group_reward for agent in agents_in_group]
out_group_rewards = [agent.total_out_group_reward for agent in agents_out_group]

In [None]:
results_df = pd.DataFrame({
    'Baseline': baseline_rewards,
    'Comparison': baseline_compare,
    'In-group': in_group_rewards,
    'Out-group': out_group_rewards,
    'With Identity': [in_group_rewards[i] + out_group_rewards[i] for i in range(len(in_group_rewards))]
})

# Display DataFrame
print("Simulation Results")
print(results_df.describe())

In [None]:
# Q LEARNING
class Agent:
    def __init__(self, group, coop_prob=0.5, learning_rate=0.1, discount_factor=0.95):
        self.group = group
        self.coop_prob = coop_prob
        self.total_reward = 0
        self.total_in_group_reward = 0
        self.total_out_group_reward = 0
        self.q_values = {'cooperate': 0, 'not_cooperate': 0}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.in_group_interactions = 0
        self.out_group_interactions = 0

    def choose_action(self):
        # Choose action based on Q-values
        if self.q_values['cooperate'] > self.q_values['not_cooperate']:
            return 'cooperate'
        elif self.q_values['cooperate'] < self.q_values['not_cooperate']:
            return 'not_cooperate'
        else:
            # If equal, choose randomly based on initial coop_prob
            return 'cooperate' if np.random.rand() < self.coop_prob else 'not_cooperate'

    def update_q_values(self, action, reward, next_max_q):
        # Update Q-values using the Q-learning formula
        self.q_values[action] += self.learning_rate * (reward + self.discount_factor * next_max_q - self.q_values[action])

def select_pairs(agents):
    pairs = []
    available_agents = list(agents)

    while len(pairs) < len(agents) // 2 and len(available_agents) >= 2:
        agent1, agent2 = random.sample(available_agents, 2)
        pairs.append((agent1, agent2))
        available_agents.remove(agent1)
        available_agents.remove(agent2)

    return pairs

def simulate(agents, reward_matrix, iterations=100, in_group_prob=0.7, out_group_prob=0.3):
    for _ in range(iterations):
        pairs = select_pairs(agents)
        for agent1, agent2 in pairs:
            action1 = agent1.choose_action()
            action2 = agent2.choose_action()

            if agent1.group == agent2.group:
                current_reward_matrix = in_group_reward_matrix
                agent1.in_group_interactions += 1
                agent2.in_group_interactions += 1
                reward = current_reward_matrix[0,0]
                agent1.total_in_group_reward += reward
                agent2.total_in_group_reward += reward
            elif agent1.group == 'none' and agent2.group == 'none':
                current_reward_matrix = baseline_reward_matrix
            else:
                current_reward_matrix = out_group_reward_matrix
                agent1.out_group_interactions += 1
                agent2.out_group_interactions += 1
                reward = current_reward_matrix[0, 0]
                agent1.total_out_group_reward += reward
                agent2.total_out_group_reward += reward

            if action1 == 'cooperate' and action2 == 'cooperate':
                reward = current_reward_matrix[0, 0]  # both cooperate
            elif action1 == 'cooperate' and action2 == 'not_cooperate':
                reward = current_reward_matrix[0, 1]  # agent1 cooperates, agent2 does not
            elif action1 == 'not_cooperate' and action2 == 'cooperate':
                reward = current_reward_matrix[1, 0]  # agent1 does not cooperate, agent2 cooperates
            else:
                reward = current_reward_matrix[1, 1]  # both do not cooperate

            agent1.total_reward += reward
            agent2.total_reward += reward

            # Update Q-values
            next_max_q1 = max(agent1.q_values.values())
            next_max_q2 = max(agent2.q_values.values())
            agent1.update_q_values(action1, reward, next_max_q1)
            agent2.update_q_values(action2, reward, next_max_q2)

# Create agents for each condition
# agents_07 = [Agent(group='none', coop_prob=0.7) for _ in range(15)]
# agents_03 = [Agent(group='none', coop_prob=0.3) for _ in range(15)]

agents_baseline = [Agent(group='none', coop_prob=0.5) for _ in range(30)]
agents_in_group = [Agent(group='in_group', coop_prob=0.7) for _ in range(15)] + [Agent(group='out_group', coop_prob=0.3) for _ in range(15)]
agents_out_group = [Agent(group='out_group', coop_prob=0.7) for _ in range(15)] + [Agent(group='in_group', coop_prob=0.3) for _ in range(15)]

# Run simulations
simulate(agents_baseline, baseline_reward_matrix)
simulate(agents_in_group, in_group_reward_matrix)
simulate(agents_out_group, out_group_reward_matrix)

# Collect results
baseline_rewards = [agent.total_reward for agent in agents_baseline]
in_group_rewards = [agent.total_in_group_reward for agent in agents_in_group]
out_group_rewards = [agent.total_out_group_reward for agent in agents_out_group]
rewards_07_prob_in_group = [agent.total_reward for agent in agents_in_group if agent.coop_prob == 0.7]
rewards_03_prob_in_group = [agent.total_reward for agent in agents_in_group if agent.coop_prob == 0.3]
rewards_07_prob_out_group = [agent.total_reward for agent in agents_out_group if agent.coop_prob == 0.7]
rewards_03_prob_out_group = [agent.total_reward for agent in agents_out_group if agent.coop_prob == 0.3]


# Create DataFrame for analysis
results_df = pd.DataFrame({
    'Baseline': baseline_rewards,
    'In-group': in_group_rewards,
    'Out-group': out_group_rewards,
    'With Identity': [in_group_rewards[i] + out_group_rewards[i] for i in range(len(in_group_rewards))]

})

# data frame for diff agents
df = pd.DataFrame({
    '70% Reward In Group': rewards_07_prob_in_group,
    '30% Reward In Group': rewards_03_prob_in_group,
    '70% Reward Out Group': rewards_07_prob_out_group,
    '30% Reward Out Group': rewards_03_prob_out_group
})

# Display the DataFrame
print("Simulation Results")
print(results_df.describe())