In [None]:
# Importar los nuevos algoritmos implementados
from agents.mcts_t import MonteCarloTreeSearch
from agents.counterfactualregret_t import CounterFactualRegret
import numpy as np

In [None]:
from games.nocca_nocca.nocca_nocca import NoccaNocca
from agents.agent_random import RandomAgent
from agents.minimax import MiniMax   

In [2]:
game = NoccaNocca(max_steps=150, initial_player=0, seed=1)

In [3]:
agents = {
    game.agents[0]: MiniMax(game=game, agent=game.agents[0], depth=1),
    game.agents[1]: MiniMax(game=game, agent=game.agents[1], depth=1),
}

In [4]:
game.reset()
print(f"Initial Agent: {game.agent_selection}")
while not game.game_over():
    game.render()
    action = agents[game.agent_selection].action()
    print(f"Turn {game.steps} -- Agent {game.agent_selection} plays action {action}")
    game.step(action=action)
game.render()
if game.truncated():
    print("Game was truncated")
for agent in agents:
    print(f"Reward agent {agent}: {game.reward(agent)}")
print(f"The winner is: {game.check_for_winner()}")

Initial Agent: Black
0: ___ ___ ___ ___ ___ 
1: 0__ 0__ 0__ 0__ 0__ 
2: ___ ___ ___ ___ ___ 
3: ___ ___ ___ ___ ___ 
4: ___ ___ ___ ___ ___ 
5: ___ ___ ___ ___ ___ 
6: 1__ 1__ 1__ 1__ 1__ 
7: ___ ___ ___ ___ ___ 
Turn 0 -- Agent Black plays action 57
0: ___ ___ ___ ___ ___ 
1: 0__ 0__ ___ 0__ 0__ 
2: ___ ___ 0__ ___ ___ 
3: ___ ___ ___ ___ ___ 
4: ___ ___ ___ ___ ___ 
5: ___ ___ ___ ___ ___ 
6: 1__ 1__ 1__ 1__ 1__ 
7: ___ ___ ___ ___ ___ 
Turn 1 -- Agent White plays action 274
0: ___ ___ ___ ___ ___ 
1: 0__ 0__ ___ 0__ 0__ 
2: ___ ___ 0__ ___ ___ 
3: ___ ___ ___ ___ ___ 
4: ___ ___ ___ ___ ___ 
5: ___ ___ ___ ___ ___ 
6: 1__ 1__ 1__ 11_ ___ 
7: ___ ___ ___ ___ ___ 
Turn 2 -- Agent Black plays action 103
0: ___ ___ ___ ___ ___ 
1: 0__ 0__ ___ 00_ 0__ 
2: ___ ___ ___ ___ ___ 
3: ___ ___ ___ ___ ___ 
4: ___ ___ ___ ___ ___ 
5: ___ ___ ___ ___ ___ 
6: 1__ 1__ 1__ 11_ ___ 
7: ___ ___ ___ ___ ___ 
Turn 3 -- Agent White plays action 267
0: ___ ___ ___ ___ ___ 
1: 0__ 0__ ___ 00_ 0__ 
2: ___ _

In [None]:
# Prueba rápida de MCTS en Nocca-Nocca
print("=== Probando MCTS en Nocca-Nocca ===")
game_mcts = NoccaNocca(max_steps=150, initial_player=0, seed=1)

# Crear agentes MCTS con pocas simulaciones para prueba rápida
agents_mcts = {
    game_mcts.agents[0]: MonteCarloTreeSearch(game=game_mcts, agent=game_mcts.agents[0], simulations=10),
    game_mcts.agents[1]: MonteCarloTreeSearch(game=game_mcts, agent=game_mcts.agents[1], simulations=10)
}

game_mcts.reset()
print(f"Initial Agent: {game_mcts.agent_selection}")
step_count = 0
while not game_mcts.game_over() and step_count < 50:  # Límite para evitar juegos muy largos
    try:
        action = agents_mcts[game_mcts.agent_selection].action()
        print(f"Turn {game_mcts.steps} -- Agent {game_mcts.agent_selection} plays action {action}")
        game_mcts.step(action=action)
        step_count += 1
    except Exception as e:
        print(f"Error: {e}")
        # Usar acción aleatoria como fallback
        available = game_mcts.available_actions()
        if available:
            action = np.random.choice(available)
            game_mcts.step(action)
            step_count += 1
        else:
            break

if game_mcts.truncated():
    print("Game was truncated")
for agent in agents_mcts:
    print(f"Reward agent {agent}: {game_mcts.reward(agent)}")
print(f"The winner is: {game_mcts.check_for_winner()}")
print("MCTS test completed!")

In [None]:
# Comparación de rendimiento entre algoritmos
print("\n=== Comparación de Algoritmos en Nocca-Nocca ===")

def test_algorithm_performance(game_class, agent_class, agent_kwargs, episodes=5):
    """Prueba el rendimiento de un algoritmo"""
    wins = 0
    total_steps = 0
    
    for episode in range(episodes):
        game = game_class(max_steps=100, seed=episode)
        agents = {}
        for agent_id in game.agents:
            agents[agent_id] = agent_class(game=game, agent=agent_id, **agent_kwargs)
        
        game.reset()
        steps = 0
        while not game.game_over() and steps < 100:
            try:
                action = agents[game.agent_selection].action()
                game.step(action)
                steps += 1
            except:
                available = game.available_actions()
                if available:
                    action = np.random.choice(available)
                    game.step(action)
                    steps += 1
                else:
                    break
        
        total_steps += steps
        if hasattr(game, 'check_for_winner'):
            winner = game.check_for_winner()
            if winner:
                wins += 1
    
    avg_steps = total_steps / episodes
    return wins, avg_steps

# Probar diferentes algoritmos
print("\nMiniMax:")
minimax_wins, minimax_steps = test_algorithm_performance(
    NoccaNocca, MiniMax, {'depth': 1}, episodes=5
)
print(f"  Wins: {minimax_wins}/5, Avg steps: {minimax_steps:.1f}")

print("\nMCTS:")
mcts_wins, mcts_steps = test_algorithm_performance(
    NoccaNocca, MonteCarloTreeSearch, {'simulations': 10}, episodes=5
)
print(f"  Wins: {mcts_wins}/5, Avg steps: {mcts_steps:.1f}")

print("\nRandom:")
random_wins, random_steps = test_algorithm_performance(
    NoccaNocca, RandomAgent, {}, episodes=5
)
print(f"  Wins: {random_wins}/5, Avg steps: {random_steps:.1f}")

print("\n¡Comparación completada!")