In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import wandb
import time
import os
from itertools import product
from games.mp import MP
from games.rps import RPS
from games.blotto import Blotto

from agents.fictitiousplay import FictitiousPlay
from agents.regretmatching import RegretMatching
from agents.random_agent import RandomAgent

import os
os.environ['WANDB_SILENT'] = "true"
os.environ['WANDB_START_METHOD'] = "thread"

In [None]:
# Configuración de experimentos
CONFIGS = [
    {
        'game': 'MP',
        'game_params': {},
        'train_config': {'episodes': 1000, 'iterations': 10},
        'eval_config': {'episodes': 500},
        'agents': {
            'agent_0': {'type': 'fictitious_play', 'config': {'initial': None, 'seed': 42}},
            'agent_1': {'type': 'regret_matching', 'config': {'initial': None, 'seed': 42}}
        }
    },
    {
        'game': 'RPS',
        'game_params': {},
        'train_config': {'episodes': 1000, 'iterations': 10},
        'eval_config': {'episodes': 500},
        'agents': {
            'agent_0': {'type': 'fictitious_play', 'config': {'initial': None, 'seed': 42}},
            'agent_1': {'type': 'random', 'config': {'initial': None, 'seed': 42}}
        }
    },
    {
        'game': 'Blotto',
        'game_params': {'S': 5, 'N': 2},
        'train_config': {'episodes': 500, 'iterations': 20},
        'eval_config': {'episodes': 200},
        'agents': {
            'agent_0': {'type': 'regret_matching', 'config': {'initial': None, 'seed': 42}},
            'agent_1': {'type': 'fictitious_play', 'config': {'initial': None, 'seed': 42}}
        }
    }
]

In [None]:
def setup_wandb(config):
    """Configura Weights & Biases para tracking"""
    wandb.init(
        project="normal_form_games",
        config={
            'game': config['game'],
            'game_params': config['game_params'],
            'train_episodes': config['train_config']['episodes'],
            'train_iterations': config['train_config']['iterations'],
            'agent_types': {k: v['type'] for k, v in config['agents'].items()}
        }
    )


In [None]:
def create_game(game_name, game_params):
    """Factory para crear juegos"""
    if game_name == 'MP':
        return MP()
    elif game_name == 'RPS':
        return RPS()
    elif game_name == 'Blotto':
        return Blotto(**game_params)
    else:
        raise ValueError(f"Juego no soportado: {game_name}")

def create_agent(agent_type, game, agent_id, config):
    """Factory para crear agentes"""
    if agent_type == 'fictitious_play':
        return FictitiousPlay(game, agent_id, **config)
    elif agent_type == 'regret_matching':
        return RegretMatching(game, agent_id, **config)
    elif agent_type == 'random':
        return RandomAgent(game, agent_id, **config)
    else:
        raise ValueError(f"Tipo de agente no soportado: {agent_type}")

In [None]:


def play_episode(game, agents, verbose=False):
    """Ejecuta un episodio del juego"""
    game.reset()
    actions = {agent_id: agents[agent_id].action() for agent_id in game.agents}
    _, rewards, _, _, _ = game.step(actions)
    
    if verbose:
        game.render()
    
    return rewards

def train(game, agents, train_config):
    """Entrena agentes en el juego"""
    stats = {
        'rewards': {agent_id: [] for agent_id in game.agents},
        'policies': {agent_id: [] for agent_id in game.agents}
    }
    
    total_episodes = train_config['episodes'] * train_config['iterations']
    
    with tqdm(total=total_episodes, desc="Training") as pbar:
        for _ in range(train_config['iterations']):
            iteration_rewards = {agent_id: 0 for agent_id in game.agents}
            
            for _ in range(train_config['episodes']):
                rewards = play_episode(game, agents)
                
                for agent_id in game.agents:
                    iteration_rewards[agent_id] += rewards[agent_id]
                
                pbar.update(1)
            
            # Guardar estadísticas
            for agent_id in game.agents:
                avg_reward = iteration_rewards[agent_id] / train_config['episodes']
                stats['rewards'][agent_id].append(avg_reward)
                stats['policies'][agent_id].append(agents[agent_id].policy())
                
                if wandb.run:
                    wandb.log({
                        f"train/{agent_id}_avg_reward": avg_reward,
                        "episode": wandb.run.step
                    })
    
    return stats

def evaluate(game, agents, eval_config, against_fixed=None):
    """Evalúa agentes contra otros o políticas fijas"""
    stats = {
        'rewards': {agent_id: [] for agent_id in game.agents},
        'policies': {agent_id: agents[agent_id].policy() for agent_id in game.agents}
    }
    
    # Configurar oponentes
    if against_fixed:
        eval_agents = {}
        for agent_id in game.agents:
            if agent_id in against_fixed:
                eval_agents[agent_id] = against_fixed[agent_id]
            else:
                eval_agents[agent_id] = agents[agent_id]
    else:
        eval_agents = agents
    
    with tqdm(total=eval_config['episodes'], desc="Evaluation") as pbar:
        for _ in range(eval_config['episodes']):
            rewards = play_episode(game, eval_agents)
            
            for agent_id in game.agents:
                stats['rewards'][agent_id].append(rewards[agent_id])
            
            pbar.update(1)
    
    # Calcular promedios
    for agent_id in game.agents:
        avg_reward = np.mean(stats['rewards'][agent_id])
        stats['rewards'][agent_id] = avg_reward
        
        if wandb.run:
            wandb.log({
                f"eval/{agent_id}_avg_reward": avg_reward,
                "episode": wandb.run.step
            })
    
    return stats

def plot_results(train_stats, eval_stats, agents, game_name):
    """Visualiza resultados de entrenamiento y evaluación"""
    plt.figure(figsize=(15, 5))
    
    # Gráfico 1: Progreso de entrenamiento
    plt.subplot(1, 3, 1)
    for agent_id in train_stats['rewards']:
        plt.plot(train_stats['rewards'][agent_id], 
                label=f"{agent_id} ({type(agents[agent_id]).__name__})")
    plt.title(f"Training Progress\n{game_name}")
    plt.xlabel("Iteration")
    plt.ylabel("Average Reward")
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Gráfico 2: Políticas aprendidas
    plt.subplot(1, 3, 2)
    for agent_id in train_stats['policies']:
        final_policy = train_stats['policies'][agent_id][-1]
        plt.bar(range(len(final_policy)), final_policy, 
               alpha=0.6, label=f"{agent_id} Policy")
    plt.title("Final Policies")
    plt.xlabel("Action")
    plt.ylabel("Probability")
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Gráfico 3: Recompensas de evaluación
    plt.subplot(1, 3, 3)
    agent_ids = list(eval_stats['rewards'].keys())
    rewards = [eval_stats['rewards'][agent_id] for agent_id in agent_ids]
    plt.bar(agent_ids, rewards, alpha=0.6)
    plt.title("Evaluation Rewards")
    plt.ylabel("Average Reward")
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    # Guardar en W&B
    if wandb.run:
        wandb.log({"results": plt})
    
    plt.show()

def run_experiment(config):
    """Ejecuta un experimento completo"""
    print(f"\n=== Running Experiment: {config['game']} ===")
    print(f"Agents: { {k: v['type'] for k, v in config['agents'].items()} }")
    
    # Setup W&B
    setup_wandb(config)
    
    # Crear juego y agentes
    game = create_game(config['game'], config['game_params'])
    agents = {
        agent_id: create_agent(agent_config['type'], game, agent_id, agent_config['config'])
        for agent_id, agent_config in config['agents'].items()
    }
    
    # Fase de entrenamiento
    train_stats = train(game, agents, config['train_config'])
    
    # Fase de evaluación
    eval_stats = evaluate(game, agents, config['eval_config'])
    
    # Visualización
    plot_results(train_stats, eval_stats, agents, config['game'])
    
    # Finalizar W&B
    wandb.finish()
    
    return train_stats, eval_stats

# Ejecutar todos los experimentos
if __name__ == "__main__":
    # Iniciar sesión en W&B (solo una vez)
    wandb.login()
    
    # Ejecutar configuraciones
    for config in CONFIGS:
        train_stats, eval_stats = run_experiment(config)