# Sanity check para FictitiousPlay Agent
Este notebook prueba la implementación del FictitiousPlay Agent en todos los juegos disponibles durante al menos 10 episodios cada uno.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from games.rps import RPS
from games.mp import MP
from games.blotto import Blotto
from games.foraging import Foraging
from agents.fictitiousplay_t import FictitiousPlay # Import modificado

## Definir juegos y configuraciones
Define los juegos a probar y sus configuraciones.

In [2]:
games_to_test = [
    {"name": "RPS", "game": RPS, "config": {}},
    {"name": "MP", "game": MP, "config": {}},
    {"name": "Blotto", "game": Blotto, "config": {"S": 3, "N": 2}},
    {"name": "Foraging", "game": Foraging, "config": {"config": "Foraging-5x5-2p-1f-v3", "seed": 1}},
]

def get_game_instance(game_entry):
    if game_entry["name"] == "Blotto":
        return game_entry["game"](**game_entry["config"])
    elif game_entry["name"] == "Foraging":
        return game_entry["game"](config=game_entry["config"]["config"], seed=game_entry["config"]["seed"])
    else:
        return game_entry["game"]()

## Inicializar FictitiousPlay Agent para cada juego
Para cada juego, inicializar el FictitiousPlay Agent para todos los agentes en el entorno.

In [3]:
def create_agents(game):
    return {
        agent_id: FictitiousPlay(
            game=game, 
            agent=agent_id, 
            seed=1
        ) for agent_id in game.agents
    }

## Ejecutar episodios para cada juego
Ejecutar al menos 10 episodios por juego y recolectar las recompensas acumuladas.

In [4]:
def play_episodes(game, agents, episodes=10):
    recompensas_acumuladas_totales = {agent_id: 0.0 for agent_id in game.agents}
    game_name_for_debug = "UnknownGame"
    if hasattr(game, 'metadata') and game.metadata and 'name' in game.metadata:
        game_name_for_debug = game.metadata['name']
    elif hasattr(game, 'env') and hasattr(game.env, 'spec') and game.env.spec:
         game_name_for_debug = game.env.spec.id
    else:
        game_name_for_debug = game.__class__.__name__

    is_foraging_game = "Foraging" in game_name_for_debug

    for ep in range(episodes):
        if is_foraging_game:
            print(f"[Foraging Debug - FP] Starting Episode {ep+1}/{episodes} for {game_name_for_debug}")
        
        game.reset()

        turn = 0
        max_turns_per_episode = 200 

        while not (all(game.terminations.values()) or all(game.truncations.values())):
            if is_foraging_game:
                print(f"\n[Foraging Debug - FP] Episode {ep+1}, Turn {turn+1}")

            current_actions = {}
            for agent_id in game.agents:
                if not game.terminations[agent_id] and not game.truncations[agent_id]:
                    try:
                        action = agents[agent_id].action()
                        current_actions[agent_id] = action
                        if is_foraging_game:
                            print(f"[Foraging Debug - FP] Agent {agent_id} chose action: {action}")
                    except Exception as e:
                        if is_foraging_game:
                            print(f"[Foraging Debug - FP] ERROR in agent {agent_id}.action(): {e}")
                        import traceback
                        traceback.print_exc()
                        raise 
            
            if not current_actions:
                 if is_foraging_game:
                    print(f"[Foraging Debug - FP] All agents terminated/truncated. Ending episode.")
                 break

            try:
                game.step(current_actions) 
            except Exception as e:
                if is_foraging_game:
                    print(f"[Foraging Debug - FP] ERROR in game.step({current_actions}): {e}")
                import traceback
                traceback.print_exc()
                raise 

            for agent_id in game.agents:
                if not game.terminations[agent_id] and not game.truncations[agent_id]:
                    agents[agent_id].update() 
                recompensas_acumuladas_totales[agent_id] += game.reward(agent_id)
            
            if is_foraging_game:
                current_rewards = {ag: game.rewards[ag] for ag in game.agents}
                print(f"[Foraging Debug - FP] Rewards after turn {turn+1}: {current_rewards}")
                print(f"[Foraging Debug - FP] Terminations: {game.terminations}")
                print(f"[Foraging Debug - FP] Truncations: {game.truncations}")

            turn += 1
            if turn >= max_turns_per_episode: 
                if is_foraging_game:
                    print(f"[Foraging Debug - FP] Safety break: Exceeded {max_turns_per_episode} turns in episode {ep+1}.")
                for agent_id in game.agents:
                    game.truncations[agent_id] = True 
                break
        
        if is_foraging_game:
            print(f"[Foraging Debug - FP] Episode {ep+1} finished. Total turns: {turn}")

    return recompensas_acumuladas_totales

## Mostrar resultados
Mostrar las recompensas acumuladas para cada agente en cada juego después de 10 episodios.

In [5]:
resultados = {}
for entry in games_to_test:
    print(f"\nTesting {entry['name']} with FictitiousPlay Agent...")
    game_instance = get_game_instance(entry)
    agents = create_agents(game_instance)
    
    try:
        recompensas = play_episodes(game_instance, agents, episodes=10)
        resultados[entry['name']] = recompensas
        for agent, recompensa in recompensas.items():
            print(f"Agent {agent}: Total reward in 10 episodes: {recompensa}")
    except Exception as e:
        print(f"ERROR testing {entry['name']}: {e}")
        import traceback
        traceback.print_exc()
        resultados[entry['name']] = f"Error: {e}"

print("\n\nSummary of Results:")
for game_name, res in resultados.items():
    print(f"Game: {game_name}")
    if isinstance(res, dict):
        for agent, reward in res.items():
            print(f"  {agent}: {reward}")
    else:
        print(f"  Result: {res}")


Testing RPS with FictitiousPlay Agent...
Agent agent_0: Total reward in 10 episodes: -1.0
Agent agent_1: Total reward in 10 episodes: 1.0

Testing MP with FictitiousPlay Agent...
Agent agent_0: Total reward in 10 episodes: 0.0
Agent agent_1: Total reward in 10 episodes: 0.0

Testing Blotto with FictitiousPlay Agent...
Agent agent_0: Total reward in 10 episodes: 0.0
Agent agent_1: Total reward in 10 episodes: 0.0

Testing Foraging with FictitiousPlay Agent...
[Foraging Debug - FP] Starting Episode 1/10 for Foraging-5x5-2p-1f-v3

[Foraging Debug - FP] Episode 1, Turn 1
[Foraging Debug - FP] Agent agent_0 chose action: 5
[Foraging Debug - FP] Agent agent_1 chose action: 3
[Foraging Debug - FP] Rewards after turn 1: {'agent_0': 0, 'agent_1': 0}
[Foraging Debug - FP] Terminations: {'agent_0': False, 'agent_1': False}
[Foraging Debug - FP] Truncations: {'agent_0': False, 'agent_1': False}

[Foraging Debug - FP] Episode 1, Turn 2
[Foraging Debug - FP] Agent agent_0 chose action: 4
[Foraging 

  logger.warn(


[Foraging Debug - FP] Agent agent_1 chose action: 0
[Foraging Debug - FP] Rewards after turn 27: {'agent_0': 0, 'agent_1': 0}
[Foraging Debug - FP] Terminations: {'agent_0': False, 'agent_1': False}
[Foraging Debug - FP] Truncations: {'agent_0': False, 'agent_1': False}

[Foraging Debug - FP] Episode 8, Turn 28
[Foraging Debug - FP] Agent agent_0 chose action: 5
[Foraging Debug - FP] Agent agent_1 chose action: 5
[Foraging Debug - FP] Rewards after turn 28: {'agent_0': 0, 'agent_1': 0}
[Foraging Debug - FP] Terminations: {'agent_0': False, 'agent_1': False}
[Foraging Debug - FP] Truncations: {'agent_0': False, 'agent_1': False}

[Foraging Debug - FP] Episode 8, Turn 29
[Foraging Debug - FP] Agent agent_0 chose action: 4
[Foraging Debug - FP] Agent agent_1 chose action: 2
[Foraging Debug - FP] Rewards after turn 29: {'agent_0': 0, 'agent_1': 0}
[Foraging Debug - FP] Terminations: {'agent_0': False, 'agent_1': False}
[Foraging Debug - FP] Truncations: {'agent_0': False, 'agent_1': False}