In [1]:
import jax
import jax.numpy as jnp
import optax
import numpy as np
from jaxmarl.environments.coin_game.make_train import make_train       
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Training

In [2]:
# Hiperparámetros
NUM_ENVS = 1
NUM_INNER_STEPS = 250
NUM_UPDATES_PER_EPOCH = 50
NUM_EPOCHS = 3000
NUM_AGENTS = 2
SHOW_EVERY_N_EPOCHS = 25
SAVE_EVERY_N_EPOCHS = 500
LR = 3e-4
PAYOFF_MATRIX = [[1, 0, 0], [1, 0, 0]]
GRID_SIZE = 3
REWARD_COEF = [[1, 0], [1, 0]]

In [None]:
#local = '/mnt/lustre/home/samuloza'
local = 'D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado'
save_dir = f'{local}/data/samuel_lozano/coin_game/pruebas/Prisioner_dilemma/'
current_date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

config = {
    "NUM_ENVS": NUM_ENVS,
    "NUM_INNER_STEPS": NUM_INNER_STEPS,
    "NUM_EPOCHS": NUM_EPOCHS,
    "NUM_AGENTS": NUM_AGENTS,
    "SHOW_EVERY_N_EPOCHS": SHOW_EVERY_N_EPOCHS,
    "SAVE_EVERY_N_EPOCHS": SAVE_EVERY_N_EPOCHS,
    "LR": LR,
    "PAYOFF_MATRIX": PAYOFF_MATRIX,
    "GRID_SIZE": GRID_SIZE,
    "REWARD_COEF": REWARD_COEF,
    "SAVE_DIR": save_dir,
    "GAMMA": 0.99,  # Slightly reduced for more immediate rewards
    "GAE_LAMBDA": 0.95,  # GAE-Lambda parameter
    "ENT_COEF": 0.15,  # Increased entropy coefficient for better exploration
    "CLIP_EPS": 0.1,  # PPO clip parameter
    "VF_COEF": 0.7,  # Value function coefficient
    "MAX_GRAD_NORM": 0.5,  # Gradient clipping
    "MINIBATCH_SIZE": NUM_INNER_STEPS // NUM_UPDATES_PER_EPOCH,
    "NUM_UPDATES_PER_MINIBATCH": 4,
    "DEVICE": jax.devices()
}


trainer, current_date = make_train(config)

# Analysis

In [2]:
# Directorios base
#local = '/mnt/lustre/home/samuloza'
local = 'C:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado'
base_dirs = {
    "Prisioner_dilemma": f"{local}/data/samuel_lozano/coin_game/RLLIB/Prisioner_dilemma",
    "No_dilemma": f"{local}/data/samuel_lozano/coin_game/RLLIB/No_dilemma"
}

output_path = f"{local}/data/samuel_lozano/coin_game/RLLIB/training_results.csv"

# Eliminar el archivo CSV si ya existe
if os.path.exists(output_path):
    os.remove(output_path)

In [3]:
all_dfs = []

# Patrón para capturar los coeficientes de recompensa
reward_pattern = re.compile(r"REWARD_COEF:\s*\[\[\s*([\d\.eE+-]+),\s*([\d\.eE+-]+)\],\s*\[\s*([\d\.eE+-]+),\s*([\d\.eE+-]+)\]\]")

In [4]:
for dilemma_name, base_dir in base_dirs.items():
    dilemma_flag = 1 if "Prisioner" in dilemma_name else 0
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if not os.path.isdir(folder_path):
            continue

        date_time_str = folder.replace("Training_", "")
        config_path = os.path.join(folder_path, "config.txt")
        csv_path = os.path.join(folder_path, "training_stats.csv")

        if not (os.path.exists(config_path) and os.path.exists(csv_path)):
            continue

        with open(config_path, "r") as f:
            config_contents = f.read()
        match = reward_pattern.search(config_contents)
        if not match:
            continue

        alpha_1, beta_1, alpha_2, beta_2 = map(float, match.groups())

        grid_size_match = re.search(r"GRID_SIZE:\s*(\d+)", config_contents)
        grid_size = int(grid_size_match.group(1)) if grid_size_match else -1 

        lr_match = re.search(r"LR:\s*([0-9.eE+-]+)", config_contents)
        lr = float(lr_match.group(1)) 

        with open(csv_path, 'r') as f:
            lines = f.readlines()
        header = lines[0]
        filtered_lines = [header] + [line for line in lines[1:] if not line.startswith("episode,env")]

        from io import StringIO
        df = pd.read_csv(StringIO("".join(filtered_lines)))

        df.iloc[:, 0] = range(1, len(df) + 1)

        df.insert(0, "timestamp", date_time_str)
        df.insert(1, "dilemma", dilemma_flag)
        df.insert(2, "alpha_1", alpha_1)
        df.insert(3, "beta_1", beta_1)
        df.insert(4, "alpha_2", alpha_2)
        df.insert(5, "beta_2", beta_2)
        df.insert(6, "grid_size", grid_size)
        df.insert(7, "lr", lr)

        all_dfs.append(df)

In [5]:
# Concatenar todos los resultados
final_df = pd.concat(all_dfs, ignore_index=True)
final_df.to_csv(output_path, index=False)

# Visualization

In [6]:
# Leer el CSV especificando los tipos de datos
dtype_dict = {
    "timestamp": str,
    "dilemma": int,
    "alpha_1": float,
    "beta_1": float,
    "alpha_2": float,
    "beta_2": float
}

df = pd.read_csv(output_path, dtype=dtype_dict, low_memory=False)
for col in df.columns[6:]:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Crear una columna identificadora de combinación de coeficientes
df = df.sort_values(by=["alpha_1", "alpha_2"], ascending=[False, False])
df["attitude_key"] = df.apply(lambda row: f"{row['alpha_1']}_{row['beta_1']}_{row['alpha_2']}_{row['beta_2']}", axis=1)
df["pure_reward_total"] = df["pure_reward_agent_0"] + df["pure_reward_agent_1"]
df = df.rename(columns={"episode": "epoch", "no_coin_adjacent_agent_0": "no_coin_visible_agent_0", "no_coin_adjacent_agent_1": "no_coin_visible_agent_1"})

In [8]:
# Filtrar todas las combinaciones únicas
unique_attitudes = df["attitude_key"].unique()
unique_lr = df["lr"].unique()
unique_dilemma = df["dilemma"].unique()
unique_environments = df["env"].unique()

figures_dir = f"{local}/data/samuel_lozano/coin_game/RLLIB/figures/"
os.makedirs(figures_dir, exist_ok=True)

metrics_0 = [
    "own_coin_collected_agent_0",
    "other_coin_collected_agent_0",
    "reject_own_coin_agent_0",
    "reject_other_coin_agent_0",
    "no_coin_visible_agent_0"
]

metrics_1 = [
    "own_coin_collected_agent_1",
    "other_coin_collected_agent_1",
    "reject_own_coin_agent_1",
    "reject_other_coin_agent_1",
    "no_coin_visible_agent_1"
]

In [9]:
# Print Pure total reward vs epoch

for attitude in unique_attitudes:
    subset = df[df["attitude_key"] == attitude]

    plt.figure(figsize=(10, 6))
    
    for dilemma_value in unique_dilemma:
        dilemma_filtered = subset[subset["dilemma"] == dilemma_value]
    
        # Filtrar por tasa de aprendizaje
        for lr in unique_lr:
            lr_filtered = dilemma_filtered[dilemma_filtered["lr"] == lr]
            grouped = lr_filtered.groupby("epoch")["pure_reward_total"].mean().reset_index()
            label = f"Dilemma {dilemma_value}, LR {lr}"
            plt.plot(grouped["epoch"], grouped["pure_reward_total"], label=label)
    
    # Añadir detalles
    plt.title(f"Pure Reward vs Epoch\nAttitude {attitude}")
    plt.xlabel("Epoch")
    #plt.xlim([0,100])
    plt.ylabel("Pure Reward Total")
    plt.legend()
    plt.tight_layout()
    
    sanitized_attitude = attitude.replace('.', 'p')
    filename = f"pure_reward_attitude_{sanitized_attitude}.png"
    filepath = os.path.join(figures_dir, filename)
    plt.savefig(filepath)
    plt.close()

In [10]:
# Print Each agent pure total reward vs epoch

for attitude in unique_attitudes:
    subset = df[df["attitude_key"] == attitude]

    for dilemma_value in unique_dilemma:
        dilemma_filtered = subset[subset["dilemma"] == dilemma_value]
    
        # Crear la figura
        plt.figure(figsize=(10, 6))
    
        # Filtrar por tasa de aprendizaje
        for lr in unique_lr:
            lr_filtered = dilemma_filtered[dilemma_filtered["lr"] == lr]
            grouped = lr_filtered.groupby("epoch")[["pure_reward_agent_0", "pure_reward_agent_1"]].mean().reset_index()
            label_0 = f"Agent 0, LR {lr}"
            label_1 = f"Agent 1, LR {lr}"
            plt.plot(grouped["epoch"], grouped["pure_reward_agent_0"], label=label_0)
            plt.plot(grouped["epoch"], grouped["pure_reward_agent_1"], label=label_1)
    
        # Añadir detalles
        plt.title(f"Pure Reward vs Epoch\nAttitude {attitude}, Dilemma {dilemma_value}")
        plt.xlabel("Epoch")
        #plt.xlim([0,100])
        plt.ylabel("Pure Reward Total")
        plt.legend()
        plt.tight_layout()
        
        sanitized_attitude = attitude.replace('.', 'p')
        filename = f"pure_reward_agents_d{dilemma_value}_attitude_{sanitized_attitude}.png"
        filepath = os.path.join(figures_dir, filename)
        plt.savefig(filepath)
        plt.close()

In [11]:
# Print agent metrics vs epoch
for attitude in unique_attitudes:
    subset = df[df["attitude_key"] == attitude]

    att_parts = attitude.split('_')
    att0_title = f"{att_parts[0]}_{att_parts[1]}"
    att1_title = f"{att_parts[2]}_{att_parts[3]}"

    for dilemma_value in [0, 1]:
        for grid_size in subset["grid_size"].unique():
            for lr in subset["lr"].unique():
                filtered_subset = subset[(subset["dilemma"] == dilemma_value) & (subset["grid_size"] == grid_size) & (subset["lr"] == lr)]
    
                plt.figure(figsize=(12, 6))
                for metric in metrics_0:
                    grouped = filtered_subset.groupby(["epoch"])[metric].mean().reset_index()
                    plt.plot(grouped["epoch"], grouped[metric], label=metric.replace("_", " ").title())
                plt.title(f"Metrics per Epoch - Dilemma {dilemma_value}, LR {lr}, Attitude {att0_title}")
                plt.xlabel("Epoch")
                plt.ylabel("Mean value")
                plt.legend()
                #plt.xlim([0, 100])
                plt.tight_layout()
                
                sanitized_attitude = attitude.replace('.', 'p')
                filename_0 = f"metrics_agent0_d{dilemma_value}_lr{str(lr).replace('.', 'p')}_attitude_{sanitized_attitude}.png"
                filepath_0 = os.path.join(figures_dir, filename_0)
                plt.savefig(filepath_0)
                plt.close()
    
                plt.figure(figsize=(12, 6))
                for metric in metrics_1:
                    grouped = filtered_subset.groupby(["epoch"])[metric].mean().reset_index()
                    plt.plot(grouped["epoch"], grouped[metric], label=metric.replace("_", " ").title())
                plt.title(f"Metrics per Epoch - Dilemma {dilemma_value}, LR {lr}, Attitude {att1_title}")
                plt.xlabel("Epoch")
                plt.ylabel("Mean value")
                plt.legend()
                #plt.xlim([0, 100])
                plt.tight_layout()

                filename_1 = f"metrics_agent1_d{dilemma_value}_lr{str(lr).replace('.', 'p')}_attitude_{sanitized_attitude}.png"
                filepath_1 = os.path.join(figures_dir, filename_1)
                plt.savefig(filepath_1)
                plt.close()

## Averaging over attitudes

In [12]:
# Print Pure total reward vs epoch

plt.figure(figsize=(10, 6))

for dilemma_value in unique_dilemma:
    dilemma_filtered_df = df[df["dilemma"] == dilemma_value]

    # Filtrar por tasa de aprendizaje
    for lr in unique_lr:
        lr_filtered_df = dilemma_filtered_df[dilemma_filtered_df["lr"] == lr]
        grouped = lr_filtered_df.groupby("epoch")["pure_reward_total"].mean().reset_index()
        label = f"Dilemma {dilemma_value}, LR {lr}"
        plt.plot(grouped["epoch"], grouped["pure_reward_total"], label=label)

# Añadir detalles
plt.title(f"Pure Reward vs Epoch")
plt.xlabel("Epoch")
#plt.xlim([0,100])
plt.ylabel("Pure Reward Total")
plt.legend()
plt.tight_layout()

filename = f"pure_reward.png"
filepath = os.path.join(figures_dir, filename)
plt.savefig(filepath)
plt.close()

In [13]:
# Print Each agent pure total reward vs epoch

for dilemma_value in unique_dilemma:
    dilemma_filtered = subset[subset["dilemma"] == dilemma_value]

    # Crear la figura
    plt.figure(figsize=(10, 6))

    # Filtrar por tasa de aprendizaje
    for lr in unique_lr:
        lr_filtered = dilemma_filtered[dilemma_filtered["lr"] == lr]
        grouped = lr_filtered.groupby("epoch")[["pure_reward_agent_0", "pure_reward_agent_1"]].mean().reset_index()
        label_0 = f"Agent 0, LR {lr}"
        label_1 = f"Agent 1, LR {lr}"
        plt.plot(grouped["epoch"], grouped["pure_reward_agent_0"], label=label_0)
        plt.plot(grouped["epoch"], grouped["pure_reward_agent_1"], label=label_1)

    # Añadir detalles
    plt.title(f"Pure Reward vs Epoch\nDilemma {dilemma_value}")
    plt.xlabel("Epoch")
    #plt.xlim([0,100])
    plt.ylabel("Pure Reward Total")
    plt.legend()
    plt.tight_layout()
    
    sanitized_attitude = attitude.replace('.', 'p')
    filename = f"pure_reward_agents_d{dilemma_value}.png"
    filepath = os.path.join(figures_dir, filename)
    plt.savefig(filepath)
    plt.close()

In [14]:
# Print agent metrics vs epoch

for dilemma_value in unique_dilemma:
    for grid_size in df["grid_size"].unique():
        for lr in unique_lr:
            filtered_df = df[(df["dilemma"] == dilemma_value) & (df["grid_size"] == grid_size) & (df["lr"] == lr)]

            plt.figure(figsize=(12, 6))

            for metric in metrics_0:
                grouped = filtered_df.groupby(["epoch"])[metric].mean().reset_index()
                plt.plot(grouped["epoch"], grouped[metric], label=metric.replace("_", " ").title())

            plt.title(f"Metrics per Epoch - Dilemma {dilemma_value}, LR {lr}")
            plt.xlabel("Epoch")
            plt.ylabel("Mean value")
            plt.legend()
            #plt.xlim([0, 100])
            plt.tight_layout()
            filename_0 = f"metrics_agent0_d{dilemma_value}_lr{str(lr).replace('.', 'p')}.png"
            filepath_0 = os.path.join(figures_dir, filename_0)
            plt.savefig(filepath_0)
            plt.close()

            plt.figure(figsize=(12, 6))

            for metric in metrics_1:
                grouped = filtered_df.groupby(["epoch"])[metric].mean().reset_index()
                plt.plot(grouped["epoch"], grouped[metric], label=metric.replace("_", " ").title())

            plt.title(f"Metrics per Epoch - Dilemma {dilemma_value}, LR {lr}")
            plt.xlabel("Epoch")
            plt.ylabel("Mean value")
            plt.legend()
            #plt.xlim([0, 100])
            plt.tight_layout()
            filename_1 = f"metrics_agent1_d{dilemma_value}_lr{str(lr).replace('.', 'p')}.png"
            filepath_1 = os.path.join(figures_dir, filename_1)
            plt.savefig(filepath_1)
            plt.close()

## Mean values over few epochs

In [19]:
N = 15

smoothed_figures_dir = f"{local}/data/samuel_lozano/coin_game/RLLIB/figures/smoothed_{N}/"
os.makedirs(smoothed_figures_dir, exist_ok=True)

In [20]:
# Print Pure total reward vs epoch
for attitude in unique_attitudes:
    subset = df[df["attitude_key"] == attitude]

    plt.figure(figsize=(10, 6))
    
    for dilemma_value in unique_dilemma:
        dilemma_filtered = subset[subset["dilemma"] == dilemma_value]
    
        # Filtrar por tasa de aprendizaje
        for lr in unique_lr:
            lr_filtered = dilemma_filtered[dilemma_filtered["lr"] == lr]
            lr_filtered["epoch_block"] = (lr_filtered["epoch"] // N)

            # Calcular la media de recompensa por bloque
            block_means = lr_filtered.groupby("epoch_block")["pure_reward_total"].transform("mean")

            # Sustituir cada valor por la media de su bloque
            lr_filtered["smoothed_reward"] = block_means

            label = f"Dilemma {dilemma_value}, LR {lr}"
            plt.plot(lr_filtered["epoch"], lr_filtered["smoothed_reward"], label=label)
    
    # Añadir detalles
    plt.title(f"Pure Reward vs Epoch\nAttitude {attitude}")
    plt.xlabel("Epoch")
    #plt.xlim([0,100])
    plt.ylabel("Pure Reward Total")
    plt.legend()
    plt.tight_layout()
    
    sanitized_attitude = attitude.replace('.', 'p')
    filename = f"pure_reward_attitude_{sanitized_attitude}_smoothed_{N}.png"
    filepath = os.path.join(smoothed_figures_dir, filename)
    plt.savefig(filepath)
    plt.close()

In [21]:
# Print Each agent pure total reward vs epoch

for attitude in unique_attitudes:
    subset = df[df["attitude_key"] == attitude]

    for dilemma_value in unique_dilemma:
        dilemma_filtered = subset[subset["dilemma"] == dilemma_value]
    
        # Crear la figura
        plt.figure(figsize=(10, 6))
    
        # Filtrar por tasa de aprendizaje
        for lr in unique_lr:
            lr_filtered = dilemma_filtered[dilemma_filtered["lr"] == lr]

            lr_filtered["epoch_block"] = (lr_filtered["epoch"] // N)

            # Calcular la media de recompensa por bloque
            block_means = lr_filtered.groupby("epoch_block")[["pure_reward_agent_0", "pure_reward_agent_1"]].transform("mean")

            # Sustituir cada valor por la media de su bloque
            lr_filtered["smoothed_reward_agent_0"] = block_means["pure_reward_agent_0"]
            lr_filtered["smoothed_reward_agent_1"] = block_means["pure_reward_agent_1"]

            label_0 = f"Agent 0, LR {lr}"
            label_1 = f"Agent 1, LR {lr}"
            plt.plot(lr_filtered["epoch"], lr_filtered["smoothed_reward_agent_0"], label=label_0)
            plt.plot(lr_filtered["epoch"], lr_filtered["smoothed_reward_agent_1"], label=label_1)
    
        # Añadir detalles
        plt.title(f"Pure Reward vs Epoch\nAttitude {attitude}, Dilemma {dilemma_value}")
        plt.xlabel("Epoch")
        #plt.xlim([0,100])
        plt.ylabel("Pure Reward Total")
        plt.legend()
        plt.tight_layout()
        
        sanitized_attitude = attitude.replace('.', 'p')
        filename = f"pure_reward_agents_d{dilemma_value}_attitude_{sanitized_attitude}.png"
        filepath = os.path.join(smoothed_figures_dir, filename)
        plt.savefig(filepath)
        plt.close()

In [22]:
# Print agent metrics vs epoch
for attitude in unique_attitudes:
    subset = df[df["attitude_key"] == attitude]

    att_parts = attitude.split('_')
    att0_title = f"{att_parts[0]}_{att_parts[1]}"
    att1_title = f"{att_parts[2]}_{att_parts[3]}"

    for dilemma_value in [0, 1]:
        for grid_size in subset["grid_size"].unique():
            for lr in subset["lr"].unique():
                filtered_subset = subset[(subset["dilemma"] == dilemma_value) & (subset["grid_size"] == grid_size) & (subset["lr"] == lr)]
                filtered_subset["epoch_block"] = (filtered_subset["epoch"] // N)

                plt.figure(figsize=(12, 6))
                for metric in metrics_0:
                    block_means = filtered_subset.groupby("epoch_block")[[metric]].transform("mean")
                    filtered_subset[f"smoothed_{metric}"] = block_means[metric]
                    plt.plot(filtered_subset["epoch"], filtered_subset[f"smoothed_{metric}"], label=metric.replace("_", " ").title())
                plt.title(f"Metrics per Epoch - Dilemma {dilemma_value}, LR {lr}, Attitude {att0_title}")
                plt.xlabel("Epoch")
                plt.ylabel("Mean value")
                plt.legend()
                #plt.xlim([0, 100])
                plt.tight_layout()
                
                sanitized_attitude = attitude.replace('.', 'p')
                filename_0 = f"metrics_agent0_d{dilemma_value}_lr{str(lr).replace('.', 'p')}_attitude_{sanitized_attitude}_smoothed_{N}.png"
                filepath_0 = os.path.join(smoothed_figures_dir, filename_0)
                plt.savefig(filepath_0)
                plt.close()
    
                plt.figure(figsize=(12, 6))
                
                for metric in metrics_1:
                    block_means = filtered_subset.groupby("epoch_block")[[metric]].transform("mean")
                    filtered_subset[f"smoothed_{metric}"] = block_means[metric]
                    plt.plot(filtered_subset["epoch"], filtered_subset[f"smoothed_{metric}"], label=metric.replace("_", " ").title())
                plt.title(f"Metrics per Epoch - Dilemma {dilemma_value}, LR {lr}, Attitude {att1_title}")
                plt.xlabel("Epoch")
                plt.ylabel("Mean value")
                plt.legend()
                #plt.xlim([0, 100])
                plt.tight_layout()

                filename_1 = f"metrics_agent1_d{dilemma_value}_lr{str(lr).replace('.', 'p')}_attitude_{sanitized_attitude}_smoothed_{N}.png"
                filepath_1 = os.path.join(smoothed_figures_dir, filename_1)
                plt.savefig(filepath_1)
                plt.close()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_subset["epoch_block"] = (filtered_subset["epoch"] // N)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_subset[f"smoothed_{metric}"] = block_means[metric]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_subset[f"smoothed_{metric}"] = block_means[metric]
A value is trying to