In [1]:
import os
import sys
import ast
import re
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ray.tune.registry import register_env
from ray.rllib.algorithms.algorithm import Algorithm
from jaxmarl.environments.coin_game.coin_game_rllib_env import CoinGameRLLibEnv




In [2]:
# GRAPH SETTINGS
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'

In [3]:
def env_creator(env_config):
    return CoinGameRLLibEnv(**env_config)

register_env("coin_game_env_RLLIB", env_creator)

In [4]:
#local = '/mnt/lustre/home/samuloza'
local = '/home/samuel_lozano'
#local = 'C:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado'

DILEMMA = 0
extra_info = 'Individual'
MAIN_PATH = f"{local}/data/samuel_lozano/CoopCoins/RLLIB/{extra_info}"

if DILEMMA == 1:
    BASE_PATH = f"{MAIN_PATH}/Prisioner_dilemma"
elif DILEMMA == 0:
    BASE_PATH = f"{MAIN_PATH}/No_dilemma"

REWARD_ATTITUDE_PAIRS = [
    ('cooperative', 'individualistic'),
    ('competitive', 'individualistic'),
    # Add more pairs as needed
]

CHECKPOINT = 5000

In [5]:
attitudes = {
    "individualistic": (1.0, 0.0),
    "cooperative": (0.707, 0.707),
    "competitive": (0.707, -0.707)
}

attitude_names = {v: k for k, v in attitudes.items()}

# Automatically build REWARD_COEFS from attitude names
REWARD_COEFS = [
    [list(attitudes[a1]), list(attitudes[a2])] for (a1, a2) in REWARD_ATTITUDE_PAIRS
]

REWARD_ATTITUDE_PAIRS = REWARD_ATTITUDE_PAIRS + [(b, a) for (a, b) in REWARD_ATTITUDE_PAIRS]

# Add reversed pairs automatically
REWARD_COEFS += [[pair[1], pair[0]] for pair in REWARD_COEFS]

# Helper to match coefs up to 3 decimals
def match_coef(row, coef):
    return np.isclose(row['OWN_REWARD_COEF_ALPHA'], coef[0], atol=1e-3) and np.isclose(row['OWN_REWARD_COEF_BETA'], coef[1], atol=1e-3)

def match_other_coef(row, coef):
    return np.isclose(row['OTHER_REWARD_COEF_ALPHA'], coef[0], atol=1e-3) and np.isclose(row['OTHER_REWARD_COEF_BETA'], coef[1], atol=1e-3)


In [6]:
MOVES = np.array([
    [0, 1],   # right
    [0, -1],  # left
    [1, 0],   # up
    [-1, 0],  # down
    [0, 0],   # stay
])

In [7]:
# Función para leer el REWARD_COEF de un config.txt
def get_reward_coef(config_path):
    with open(config_path, 'r') as f:
        for line in f:
            if line.strip().startswith('REWARD_COEF'):
                # Extrae la parte después del igual
                coef_str = line.split(':', 1)[1].strip()
                try:
                    coef = ast.literal_eval(coef_str)
                    return coef
                except Exception as e:
                    print(f"Error parsing REWARD_COEF in {config_path}: {e}")
    return None

def coefs_equal_3dec(a, b):
    return all(
        round(x, 3) == round(y, 3)
        for row_a, row_b in zip(a, b)
        for x, y in zip(row_a, row_b)
    )

# Busca los directorios que contienen los REWARD_COEF deseados
def find_training_dirs():
    matches = {}
    for dir_name in os.listdir(BASE_PATH):
        dir_path = os.path.join(BASE_PATH, dir_name)
        if not os.path.isdir(dir_path):
            continue
        config_path = os.path.join(dir_path, 'config.txt')
        if not os.path.exists(config_path):
            continue
        coef = get_reward_coef(config_path)
        for target in REWARD_COEFS:
            if coefs_equal_3dec(coef, target):
                key = str(target)
                if key not in matches:
                    matches[key] = []
                matches[key].append(dir_path)
    return matches

# Carga la política del segundo agente desde un checkpoint RLlib
def load_policy(checkpoint_dir, agent_id):
    checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_{CHECKPOINT}')
    # RLlib guarda un archivo extra con el nombre completo
    if not os.path.exists(checkpoint_path):
        # Busca el archivo real
        for f in os.listdir(checkpoint_dir):
            if f.startswith(f'checkpoint_{CHECKPOINT}'):
                checkpoint_path = os.path.join(checkpoint_dir, f)
                break
    # Carga el modelo con la API moderna
    algo = Algorithm.from_checkpoint(checkpoint_path)
    policy = algo.get_policy(f"agent_{agent_id}")
    return policy

In [8]:
def state_to_obs(state, grid_size=3):
    """
    state: dict with keys 'red_pos', 'blue_pos', 'red_coin_pos', 'blue_coin_pos'
           each value is a tuple (x, y)
    Returns: {'agent_0': obs0, 'agent_1': obs1}
    """
    obs1 = np.zeros((grid_size, grid_size, 4), dtype=np.int8)
    obs2 = np.zeros((grid_size, grid_size, 4), dtype=np.int8)
    # Fill channels for agent_0
    obs1[state['red_pos'][0], state['red_pos'][1], 0] = 1
    obs1[state['blue_pos'][0], state['blue_pos'][1], 1] = 1
    obs1[state['red_coin_pos'][0], state['red_coin_pos'][1], 2] = 1
    obs1[state['blue_coin_pos'][0], state['blue_coin_pos'][1], 3] = 1
    # For agent_1, swap red/blue and red_coin/blue_coin channels
    obs2[:, :, 0] = obs1[:, :, 1]  # blue
    obs2[:, :, 1] = obs1[:, :, 0]  # red
    obs2[:, :, 2] = obs1[:, :, 3]  # blue_coin
    obs2[:, :, 3] = obs1[:, :, 2]  # red_coin
    return {
        'agent_0': obs1.flatten(),
        'agent_1': obs2.flatten()
    }

def generate_all_valid_states(grid_size=3):
    positions = [(i, j) for i in range(grid_size) for j in range(grid_size)]
    states = []
    for red_pos in positions:
        for blue_pos in positions:
            if blue_pos == red_pos:
                continue
            for red_coin_pos in positions:
                if red_coin_pos == red_pos or red_coin_pos == blue_pos:
                    continue
                for blue_coin_pos in positions:
                    if blue_coin_pos in [red_pos, blue_pos, red_coin_pos]:
                        continue
                    state = {
                        'red_pos': red_pos,
                        'blue_pos': blue_pos,
                        'red_coin_pos': red_coin_pos,
                        'blue_coin_pos': blue_coin_pos
                    }
                    states.append(state)
    return states

def simple_adjacent(pos1, pos2, grid_size):
    return abs(pos1[0] - pos2[0]) + abs(pos1[1] - pos2[1]) == 1

def toroidal_adjacent(pos1, pos2, grid_size):
    dx = min(abs(pos1[0] - pos2[0]), grid_size - abs(pos1[0] - pos2[0]))
    dy = min(abs(pos1[1] - pos2[1]), grid_size - abs(pos1[1] - pos2[1]))
    return (dx == 1 and dy == 0) or (dx == 0 and dy == 1)

def classify_action(obs, action, grid_size=3):
    obs = np.array(obs).reshape((grid_size, grid_size, 4))
    # Channel 0: agent, 1: other agent, 2: own coin, 3: other coin
    agent_pos = np.argwhere(obs[:, :, 0] == 1)[0]
    own_coin_pos = np.argwhere(obs[:, :, 2] == 1)[0]
    other_coin_pos = np.argwhere(obs[:, :, 3] == 1)[0]
    move = MOVES[action]
    new_pos = (agent_pos + move) % grid_size
    result = [0, 0, 0, 0, 0]
    if np.array_equal(new_pos, own_coin_pos):
        result[0] = 1
    elif np.array_equal(new_pos, other_coin_pos):
        result[1] = 1
    else:
        own_adjacent = toroidal_adjacent(agent_pos, own_coin_pos, grid_size)
        other_adjacent = toroidal_adjacent(agent_pos, other_coin_pos, grid_size)
        if own_adjacent:
            result[2] = 1
        elif other_adjacent:
            result[3] = 1
        else:
            result[4] = 1
    return result

def extract_policy_to_csv(policy, output_csv, agent_id=0, grid_size=3):
    states = generate_all_valid_states(grid_size)
    with open(output_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            [f'obs_{i}' for i in range(grid_size*grid_size*4)] +
            ['action',
             'own_coin_collected', 'other_coin_collected',
             'reject_own_coin', 'reject_other_coin', 'no_coin_visible']
        )
        for state in states:
            obs_dict = state_to_obs(state)  # returns dict for both agents
            obs = obs_dict[f'agent_{agent_id}']
            action = policy.compute_single_action(obs)
            if isinstance(action, tuple):
                action_val = action[0]
            else:
                action_val = action
            metrics = classify_action(np.array(obs), action_val, grid_size=grid_size)
            writer.writerow(list(obs) + [action_val] + metrics)

# FULL POLICY CSV GENERATION

In [9]:
matches = find_training_dirs()

In [None]:
for coef, dir_paths in matches.items():
    print(f"\n=== Procesando REWARD_COEF={coef} ===")
    for dir_path in dir_paths:
        print(f"\n--- Training directory: {dir_path} ---")
        checkpoint_dir = os.path.join(dir_path, f'checkpoint_{CHECKPOINT}')
        print(f"Checkpoint directory: {checkpoint_dir}")
        config_path = os.path.join(dir_path, 'config.txt')
        if os.path.exists(config_path):
            with open(config_path) as f:
                print("Config:")
                print(f.read())
        else:
            print("Config file not found.")

        try:
            for agent_id in [0, 1]:
                print(f"Loading policy for agent {agent_id}...")
                policy = load_policy(dir_path, agent_id)
                print(f"Policy loaded for agent {agent_id}.")
                output_csv = f"policy_agent_{agent_id}_checkpoint_{CHECKPOINT}.csv"
                output_path = os.path.join(dir_path, output_csv)
                print(f"Saving CSV for agent {agent_id} to {output_path}...")
                extract_policy_to_csv(policy, output_path, agent_id, grid_size=3)
                print(f"CSV saved at {output_path}")
        except Exception as e:
            print(f"Error loading policy or saving CSV: {e}")

In [10]:
all_rows = []

for coef, dir_paths in matches.items():
    for dir_path in dir_paths:
        config_path = os.path.join(dir_path, 'config.txt')
        if not os.path.exists(config_path):
            print(f"Config file not found in {dir_path}")
            continue

        # Parse config.txt
        with open(config_path) as f:
            config_lines = f.readlines()
        config_dict = {}
        for line in config_lines:
            if ':' in line:
                key, value = line.split(':', 1)
                config_dict[key.strip()] = value.strip()
        try:
            lr = float(config_dict.get('LR', 'nan'))
            seed = int(config_dict.get('SEED', 'nan'))
            reward_coef = eval(config_dict.get('REWARD_COEF', '[[nan, nan], [nan, nan]]'))
        except Exception as e:
            print(f"Error parsing config in {dir_path}: {e}")
            continue

        for agent_id in [0, 1]:
            csv_file = os.path.join(dir_path, f'policy_agent_{agent_id}_checkpoint_{CHECKPOINT}.csv')
            if not os.path.exists(csv_file):
                print(f"CSV not found: {csv_file}")
                continue

            # Parse checkpoint from filename
            m = re.search(r'checkpoint_(\d+)', csv_file)
            checkpoint_num = int(m.group(1)) if m else None

            # Set own and other reward coefs
            own_coef = reward_coef[agent_id]
            other_coef = reward_coef[1 - agent_id]

            # Read policy CSV
            df = pd.read_csv(csv_file)
            # Add columns at the beginning
            df.insert(0, 'DILEMMA', DILEMMA)
            df.insert(1, 'LR', lr)
            df.insert(2, 'SEED', seed)
            df.insert(3, 'CHECKPOINT', checkpoint_num)
            df.insert(4, 'OWN_REWARD_COEF_ALPHA', own_coef[0])
            df.insert(5, 'OWN_REWARD_COEF_BETA', own_coef[1])
            df.insert(6, 'OTHER_REWARD_COEF_ALPHA', other_coef[0])
            df.insert(7, 'OTHER_REWARD_COEF_BETA', other_coef[1])
            df.insert(8, 'AGENT_ID', agent_id)
            #df.insert(9, 'DIR_PATH', dir_path)
            all_rows.append(df)

# Concatenate all
if all_rows:
    final_df = pd.concat(all_rows, ignore_index=True)
    output_csv_path = os.path.join(MAIN_PATH, "all_policies_combined.csv")
    final_df.to_csv(output_csv_path, index=False)
    print(f"Combined CSV saved as {output_csv_path}")
else:
    print("No data found to combine.")

Combined CSV saved as /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/all_policies_combined.csv


# MANY GAMES DECISION COMPARISON

In [25]:
matches = find_training_dirs()

# Comparing policies

In [11]:
figures_dir = os.path.join(MAIN_PATH, "comparing_policies")
os.makedirs(figures_dir, exist_ok=True)

def save_plot(fig, name, dir):
    fig_path = os.path.join(dir, name)
    fig.savefig(fig_path)
    plt.close(fig)
    print(f"Figure saved to {fig_path}")

In [12]:
categories_5 = [
    'own_coin_collected',
    'other_coin_collected',
    'reject_own_coin',
    'reject_other_coin',
    'no_coin_visible'
]
categories_2 = [
    'own_coin_collected',
    'other_coin_collected'
]
action_labels = ['right', 'left', 'up', 'down', 'stay']


In [13]:
# 1. Group directories by seed
seed_to_dirs = {}

for coef, dir_paths in matches.items():
    for dir_path in dir_paths:
        config_path = os.path.join(dir_path, 'config.txt')
        if not os.path.exists(config_path):
            continue
        with open(config_path) as f:
            config_lines = f.readlines()
        config_dict = {}
        for line in config_lines:
            if ':' in line:
                key, value = line.split(':', 1)
                config_dict[key.strip()] = value.strip()
        try:
            seed = int(config_dict.get('SEED', 'nan'))
        except Exception:
            continue
        if seed not in seed_to_dirs:
            seed_to_dirs[seed] = []
        seed_to_dirs[seed].append((dir_path, coef, config_dict))


In [14]:
for seed, dir_info_list in seed_to_dirs.items():
    for dir_path, coef, config_dict in dir_info_list:
        csv_agent_0 = os.path.join(dir_path, f'policy_agent_0_checkpoint_{CHECKPOINT}.csv')
        csv_agent_1 = os.path.join(dir_path, f'policy_agent_1_checkpoint_{CHECKPOINT}.csv')

        if not (os.path.exists(csv_agent_0) and os.path.exists(csv_agent_1)):
            print(f"CSV files not found for SEED={seed} in {dir_path}")
            continue

        df0 = pd.read_csv(csv_agent_0)
        df1 = pd.read_csv(csv_agent_1)

        plt.figure(figsize=(12, 5))
        # Compose important info for the title
        lr = config_dict.get('LR', 'N/A')
        reward_coef = config_dict.get('REWARD_COEF', 'N/A')
        title = (f"Policy Histograms for SEED={seed}\n"
                 f"LR={lr}, REWARD_COEF={reward_coef}\n"
                 f"Dir: {os.path.basename(dir_path)}")
        plt.suptitle(title, fontsize=14)

        plt.subplot(1, 2, 1)
        plt.hist(df0['action'].dropna(), bins=range(6), align='left', rwidth=0.8, color='blue')
        plt.title('Agent 0 Policy Histogram')
        plt.xlabel('Action')
        plt.ylabel('Count')
        plt.xticks(range(5), ['right', 'left', 'up', 'down', 'stay'])

        plt.subplot(1, 2, 2)
        plt.hist(df1['action'].dropna(), bins=range(6), align='left', rwidth=0.8, color='red')
        plt.title('Agent 1 Policy Histogram')
        plt.xlabel('Action')
        plt.ylabel('Count')
        plt.xticks(range(5), ['right', 'left', 'up', 'down', 'stay'])

        plt.tight_layout(rect=[0, 0.08, 1, 0.95])

        # Save with seed and dir name in filename
        safe_dir = os.path.basename(dir_path).replace(' ', '_')
        fig_path = os.path.join(figures_dir, f'policy_histograms_seed_{seed}_{safe_dir}.png')
        plt.savefig(fig_path)
        plt.close()
        print(f"Figure saved to {fig_path}")

Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_histograms_seed_5_Training_2025-07-07_15-37-15.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_histograms_seed_5_Training_2025-07-07_09-31-06.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_histograms_seed_5_Training_2025-07-07_18-07-21.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_histograms_seed_5_Training_2025-07-07_07-12-04.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_histograms_seed_4_Training_2025-07-06_23-17-44.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_histograms_seed_4_Training_2025-07-06_16-41-50.png
Figure saved to /home/samuel_lozano/data/samue

In [15]:
# The five category columns
categories = [
    'own_coin_collected',
    'other_coin_collected',
    'reject_own_coin',
    'reject_other_coin',
    'no_coin_visible'
]

for seed, dir_info_list in seed_to_dirs.items():
    for dir_path, coef, config_dict in dir_info_list:
        csv_agent_0 = os.path.join(dir_path, f'policy_agent_0_checkpoint_{CHECKPOINT}.csv')
        csv_agent_1 = os.path.join(dir_path, f'policy_agent_1_checkpoint_{CHECKPOINT}.csv')

        if not (os.path.exists(csv_agent_0) and os.path.exists(csv_agent_1)):
            print(f"CSV files not found for SEED={seed} in {dir_path}")
            continue

        df0 = pd.read_csv(csv_agent_0)
        df1 = pd.read_csv(csv_agent_1)

        # Count occurrences for each category
        counts0 = [df0[cat].sum() for cat in categories_5]
        counts1 = [df1[cat].sum() for cat in categories_5]

        plt.figure(figsize=(12, 5))
        # Compose important info for the title
        lr = config_dict.get('LR', 'N/A')
        reward_coef = config_dict.get('REWARD_COEF', 'N/A')
        title = (f"Policy Category Counts for SEED={seed}\n"
                 f"LR={lr}, REWARD_COEF={reward_coef}\n"
                 f"Dir: {os.path.basename(dir_path)}")
        plt.suptitle(title, fontsize=14)

        plt.subplot(1, 2, 1)
        plt.bar(categories_5, counts0, color='blue')
        plt.title('Agent 0 Policy Categories')
        plt.ylabel('Count')
        plt.xticks(rotation=20)

        plt.subplot(1, 2, 2)
        plt.bar(categories_5, counts1, color='red')
        plt.title('Agent 1 Policy Categories')
        plt.ylabel('Count')
        plt.xticks(rotation=20)

        plt.tight_layout(rect=[0, 0.08, 1, 0.95])

        # Save with seed and dir name in filename
        safe_dir = os.path.basename(dir_path).replace(' ', '_')
        fig_path = os.path.join(figures_dir, f'policy_categories_seed_{seed}_{safe_dir}.png')
        plt.savefig(fig_path)
        plt.close()
        print(f"Figure saved to {fig_path}")

Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_seed_5_Training_2025-07-07_15-37-15.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_seed_5_Training_2025-07-07_09-31-06.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_seed_5_Training_2025-07-07_18-07-21.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_seed_5_Training_2025-07-07_07-12-04.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_seed_4_Training_2025-07-06_23-17-44.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_seed_4_Training_2025-07-06_16-41-50.png
Figure saved to /home/samuel_lozano/data/samue

In [16]:
# First pass: find the global maximum count
global_max = 0

for seed, dir_info_list in seed_to_dirs.items():
    for dir_path, coef, config_dict in dir_info_list:
        csv_agent_0 = os.path.join(dir_path, f'policy_agent_0_checkpoint_{CHECKPOINT}.csv')
        csv_agent_1 = os.path.join(dir_path, f'policy_agent_1_checkpoint_{CHECKPOINT}.csv')

        if not (os.path.exists(csv_agent_0) and os.path.exists(csv_agent_1)):
            continue

        df0 = pd.read_csv(csv_agent_0)
        df1 = pd.read_csv(csv_agent_1)
        counts0 = [df0[cat].sum() for cat in categories_2]
        counts1 = [df1[cat].sum() for cat in categories_2]
        local_max = max(counts0 + counts1)
        if local_max > global_max:
            global_max = local_max

# Second pass: plot with the same ylim and save figures
for seed, dir_info_list in seed_to_dirs.items():
    for dir_path, coef, config_dict in dir_info_list:
        csv_agent_0 = os.path.join(dir_path, f'policy_agent_0_checkpoint_{CHECKPOINT}.csv')
        csv_agent_1 = os.path.join(dir_path, f'policy_agent_1_checkpoint_{CHECKPOINT}.csv')

        if not (os.path.exists(csv_agent_0) and os.path.exists(csv_agent_1)):
            print(f"CSV files not found for SEED={seed} in {dir_path}")
            continue

        df0 = pd.read_csv(csv_agent_0)
        df1 = pd.read_csv(csv_agent_1)
        counts0 = [df0[cat].sum() for cat in categories_2]
        counts1 = [df1[cat].sum() for cat in categories_2]

        plt.figure(figsize=(12, 5))
        # Compose important info for the title
        lr = config_dict.get('LR', 'N/A')
        reward_coef = config_dict.get('REWARD_COEF', 'N/A')
        title = (f"Policy Category Counts for SEED={seed}\n"
                 f"LR={lr}, REWARD_COEF={reward_coef}\n"
                 f"Dir: {os.path.basename(dir_path)}")
        plt.suptitle(title, fontsize=14)

        plt.subplot(1, 2, 1)
        plt.bar(categories_2, counts0, color='blue')
        plt.title('Agent 0 Policy Categories')
        plt.ylabel('Count')
        plt.xticks(rotation=20)
        plt.ylim(0, global_max * 1.05)

        plt.subplot(1, 2, 2)
        plt.bar(categories_2, counts1, color='red')
        plt.title('Agent 1 Policy Categories')
        plt.ylabel('Count')
        plt.xticks(rotation=20)
        plt.ylim(0, global_max * 1.05)

        plt.tight_layout(rect=[0, 0.08, 1, 0.95])

        # Save with seed and dir name in filename
        safe_dir = os.path.basename(dir_path).replace(' ', '_')
        fig_path = os.path.join(figures_dir, f'policy_categories_globalmax_seed_{seed}_{safe_dir}.png')
        plt.savefig(fig_path)
        plt.close()
        print(f"Figure saved to {fig_path}")

Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_globalmax_seed_5_Training_2025-07-07_15-37-15.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_globalmax_seed_5_Training_2025-07-07_09-31-06.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_globalmax_seed_5_Training_2025-07-07_18-07-21.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_globalmax_seed_5_Training_2025-07-07_07-12-04.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_globalmax_seed_4_Training_2025-07-06_23-17-44.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/policy_categories_globalmax_seed_4_Training_2025-07-06

## Averaging over seeds

In [13]:
averaging_dir = os.path.join(figures_dir, "averaging_over_seeds")
os.makedirs(averaging_dir, exist_ok=True)

df_all = pd.read_csv(os.path.join(MAIN_PATH, "all_policies_combined.csv"))

In [14]:
# Find global max for 2-category bar
global_max_2cat = 0
for own_name, other_name in REWARD_ATTITUDE_PAIRS:
    own_coef = attitudes[own_name]
    other_coef = attitudes[other_name]
    for agent_id in [0, 1]:
        mask = df_all.apply(lambda row: match_coef(row, own_coef) and match_other_coef(row, other_coef) and row['AGENT_ID'] == agent_id, axis=1)
        df = df_all[mask]
        if not df.empty:
            counts = [df[cat].sum() for cat in categories_2]
            local_max = max(counts)
            if local_max > global_max_2cat:
                global_max_2cat = local_max

In [21]:
# Now plot for each (own, other, agent_id) triple
for own_name, other_name in REWARD_ATTITUDE_PAIRS:
    own_coef = attitudes[own_name]
    other_coef = attitudes[other_name]
    for agent_id in [0, 1]:
        mask = df_all.apply(lambda row: match_coef(row, own_coef) and match_other_coef(row, other_coef) and row['AGENT_ID'] == agent_id, axis=1)
        df = df_all[mask]
        if df.empty:
            print(f"No data for {own_name} (as agent_{agent_id}) vs {other_name}")
            continue

        # Action histogram
        fig, ax = plt.subplots(figsize=(8, 5))
        ax.hist(df['action'].dropna(), bins=range(6), align='left', rwidth=0.8, color='gray')
        ax.set_title(f'Action Histogram\n{own_name.capitalize()} (as agent_{agent_id}) vs {other_name.capitalize()}')
        ax.set_xlabel('Action')
        ax.set_ylabel('Count')
        ax.set_xticks(range(5))
        ax.set_xticklabels(action_labels)
        fname = f'action_histogram_{own_name}_as_agent{agent_id}_vs_{other_name}.png'
        save_plot(fig, fname, averaging_dir)

        # 5-category bar
        avg_counts_5 = [df[cat].sum() for cat in categories_5]
        x5 = np.arange(len(categories_5))
        fig, ax = plt.subplots(figsize=(8, 5))
        ax.bar(x5, avg_counts_5, color='purple')
        ax.set_title(f'5-Category Policy Bar\n{own_name.capitalize()} (as agent_{agent_id}) vs {other_name.capitalize()}')
        ax.set_ylabel('Average Count')
        ax.set_xticks(x5)
        ax.set_xticklabels(categories_5, rotation=20)
        fname = f'policy_5cat_{own_name}_as_agent{agent_id}_vs_{other_name}.png'
        save_plot(fig, fname, averaging_dir)

        # 2-category bar (global max)
        avg_counts_2 = [df[cat].sum() for cat in categories_2]
        x2 = np.arange(len(categories_2))
        fig, ax = plt.subplots(figsize=(8, 5))
        ax.bar(x2, avg_counts_2, color=['blue', 'red'])
        ax.set_title(f'2-Category Policy Bar (Global Max)\n{own_name.capitalize()} (as agent_{agent_id}) vs {other_name.capitalize()}')
        ax.set_ylabel('Average Count')
        ax.set_ylim(0, global_max_2cat * 1.05)
        ax.set_xticks(x2)
        ax.set_xticklabels(categories_2, rotation=20)
        fname = f'policy_2cat_globalmax_{own_name}_as_agent{agent_id}_vs_{other_name}.png'
        save_plot(fig, fname, averaging_dir)

Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/action_histogram_cooperative_as_agent0_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_5cat_cooperative_as_agent0_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_2cat_globalmax_cooperative_as_agent0_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/action_histogram_cooperative_as_agent1_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_5cat_cooperative_as_agent1_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/com

In [16]:
# Find global max for 2-category bar
global_max_2cat_averaged = 0
for own_name, own_coef in REWARD_ATTITUDE_PAIRS:
    own_coef = attitudes[own_name]
    other_coef = attitudes[other_name]
    mask = df_all.apply(lambda row: match_coef(row, own_coef) and match_other_coef(row, other_coef), axis=1)
    df = df_all[mask]
    if not df.empty:
        counts = [df[cat].sum() for cat in categories_2]
        local_max = max(counts)
        if local_max > global_max_2cat_averaged:
            global_max_2cat_averaged = local_max

In [23]:
# Now plot for each (own, other) pair
for own_name, other_name in REWARD_ATTITUDE_PAIRS:
    own_coef = attitudes[own_name]
    other_coef = attitudes[other_name]
    mask = df_all.apply(lambda row: match_coef(row, own_coef) and match_other_coef(row, other_coef), axis=1)
    df = df_all[mask]
    if df.empty:
        print(f"No data for {own_name} vs {other_name}")
        continue

    # Action histogram
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.hist(df['action'].dropna(), bins=range(6), align='left', rwidth=0.8, color='gray')
    ax.set_title(f'Action Histogram\n{own_name.capitalize()} acting vs {other_name.capitalize()}')
    ax.set_xlabel('Action')
    ax.set_ylabel('Count')
    ax.set_xticks(range(5))
    ax.set_xticklabels(action_labels)
    fname = f'action_histogram_{own_name}_vs_{other_name}.png'
    save_plot(fig, fname, averaging_dir)

    # 5-category bar
    avg_counts_5 = [df[cat].sum() for cat in categories_5]
    x5 = np.arange(len(categories_5))
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.bar(x5, avg_counts_5, color='purple')
    ax.set_title(f'5-Category Policy Bar\n{own_name.capitalize()} acting vs {other_name.capitalize()}')
    ax.set_ylabel('Average Count')
    ax.set_xticks(x5)
    ax.set_xticklabels(categories_5, rotation=20)
    fname = f'policy_5cat_{own_name}_vs_{other_name}.png'
    save_plot(fig, fname, averaging_dir)

    # 2-category bar (global max)
    avg_counts_2 = [df[cat].sum() for cat in categories_2]
    x2 = np.arange(len(categories_2))
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.bar(x2, avg_counts_2, color=['blue', 'red'])
    ax.set_title(f'2-Category Policy Bar (Global Max)\n{own_name.capitalize()} acting vs {other_name.capitalize()}')
    ax.set_ylabel('Average Count')
    ax.set_ylim(0, global_max_2cat_averaged * 1.05)
    ax.set_xticks(x2)
    ax.set_xticklabels(categories_2, rotation=20)
    fname = f'policy_2cat_globalmax_{own_name}_vs_{other_name}.png'
    save_plot(fig, fname, averaging_dir)

Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/action_histogram_cooperative_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_5cat_cooperative_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_2cat_globalmax_cooperative_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/action_histogram_competitive_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_5cat_competitive_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/policy_2cat_g

### Mean

In [24]:
# Now plot for each (own, other, agent_id) triple
for own_name, other_name in REWARD_ATTITUDE_PAIRS:
    own_coef = attitudes[own_name]
    other_coef = attitudes[other_name]
    for agent_id in [0, 1]:
        mask = df_all.apply(lambda row: match_coef(row, own_coef) and match_other_coef(row, other_coef) and row['AGENT_ID'] == agent_id, axis=1)
        df = df_all[mask]
        if df.empty:
            print(f"No data for {own_name} (as agent_{agent_id}) vs {other_name}")
            continue

        # 5-category bar
        avg_counts_5 = [df[cat].mean() for cat in categories_5]
        x5 = np.arange(len(categories_5))
        fig, ax = plt.subplots(figsize=(8, 5))
        ax.bar(x5, avg_counts_5, color='purple')
        ax.set_title(f'5-Category Policy Bar\n{own_name.capitalize()} (as agent_{agent_id}) vs {other_name.capitalize()}')
        ax.set_ylabel('Average Count')
        ax.set_xticks(x5)
        ax.set_xticklabels(categories_5, rotation=20)
        fname = f'mean_policy_5cat_{own_name}_as_agent{agent_id}_vs_{other_name}.png'
        save_plot(fig, fname, averaging_dir)

        # 2-category bar (global max)
        avg_counts_2 = [df[cat].mean() for cat in categories_2]
        x2 = np.arange(len(categories_2))
        fig, ax = plt.subplots(figsize=(8, 5))
        ax.bar(x2, avg_counts_2, color=['blue', 'red'])
        ax.set_title(f'2-Category Policy Bar (Global Max)\n{own_name.capitalize()} (as agent_{agent_id}) vs {other_name.capitalize()}')
        ax.set_ylabel('Average Count')
        ax.set_xticks(x2)
        ax.set_xticklabels(categories_2, rotation=20)
        fname = f'mean_policy_2cat_globalmax_{own_name}_as_agent{agent_id}_vs_{other_name}.png'
        save_plot(fig, fname, averaging_dir)

Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/mean_policy_5cat_cooperative_as_agent0_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/mean_policy_2cat_globalmax_cooperative_as_agent0_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/mean_policy_5cat_cooperative_as_agent1_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/mean_policy_2cat_globalmax_cooperative_as_agent1_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopCoins/RLLIB/Individual/comparing_policies/averaging_over_seeds/mean_policy_5cat_competitive_as_agent0_vs_individualistic.png
Figure saved to /home/samuel_lozano/data/samuel_lozano/CoopC