In [11]:
from games.tictactoe.tictactoe import TicTacToe
from agents.agent_random import RandomAgent
from agents.minimax import MiniMax
from agents.mcts import MonteCarloTreeSearch, MCTSNode
from agents.counterfactualregret_t import CounterFactualRegret
import numpy as np
from collections import defaultdict

In [12]:
game = TicTacToe(render_mode='')

In [3]:
agents_rd = dict(map(lambda agent: (agent, RandomAgent(game=game, agent=agent)), game.agents))
agents_rd

{'X': <agents.agent_random.RandomAgent at 0x10c5e8b10>,
 'O': <agents.agent_random.RandomAgent at 0x1121598d0>}

In [4]:
game.reset()
while not game.terminated():
    game.render()
    print(game.eval(game.agent_selection))
    action = agents_rd[game.agent_selection].action()
    game.step(action)
game.render()
print(game.eval(game.agent_selection))
print(game.rewards)

Player: X
Board:
 .  .  . 
 .  .  . 
 .  .  . 

0.0
Player: O
Board:
 .  .  . 
 X  .  . 
 .  .  . 

-0.25
Player: X
Board:
 .  O  . 
 X  .  . 
 .  .  . 

0.0
Player: O
Board:
 .  O  . 
 X  .  X 
 .  .  . 

-0.125
Player: X
Board:
 .  O  . 
 X  .  X 
 O  .  . 

-0.25
Player: O
Board:
 X  O  . 
 X  .  X 
 O  .  . 

0.0
Player: X
Board:
 X  O  . 
 X  .  X 
 O  O  . 

0.0
Player: O
Board:
 X  O  X 
 X  .  X 
 O  O  . 

-0.125
Player: X
Board:
 X  O  X 
 X  .  X 
 O  O  O 

-1
{'X': -1, 'O': 1}


In [13]:
players = {}
players[game.agents[0]] = MonteCarloTreeSearch(game=game, agent=game.agents[0], simulations=100)
players[game.agents[1]] = MonteCarloTreeSearch(game=game, agent=game.agents[1], simulations=100)


In [5]:
game.reset()
game.render()
print(game.observe(game.agents[0]))
action, value = players[game.agent_selection].mcts()
print(action)
game.step(action)
game.render()
print(game.observe(game.agents[1]))
action = players[game.agent_selection].action()
game.step(action)
print(action, value)
game.render()


Player: X
Board:
 .  .  . 
 .  .  . 
 .  .  . 

[[0 0 0]
 [0 0 0]
 [0 0 0]]


AttributeError: 'CounterFactualRegret' object has no attribute 'mcts'

In [14]:
values = defaultdict(list)
N = 20
for i in range(N):    
    game.reset()
    while not game.terminated():
        agent = game.agent_selection
        action = players[agent].action()
        game.step(action)
    for agent in game.agents:
        values[agent].append(game.reward(agent))
for agent in game.agents:
    print(f"Agent {agent} average reward: {np.mean(values[agent])} over {N} games")
    print(f"Agent {agent} rewards: {values[agent]}")

Agent X average reward: 0.55 over 20 games
Agent X rewards: [1, -1, -1, 0, 1, 1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Agent O average reward: -0.55 over 20 games
Agent O rewards: [-1, 1, 1, 0, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]


In [15]:
from nbconvert import HTMLExporter
import nbformat, os, time

def save_notebook_to_html(notebook_path=f"TicTacToe.ipynb"):
    """Guarda el notebook como HTML"""
    exporter = HTMLExporter()
    notebook = nbformat.read(notebook_path, as_version=4)
    html, _ = exporter.from_notebook_node(notebook)
    
    os.makedirs("notebook_exports", exist_ok=True)
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    output_path = f"notebook_exports/TicTacToe_MCTSvsMCTS_100both_{timestamp}.html"
    
    with open(output_path, "w") as f:
        f.write(html)
    
    return output_path

In [16]:
save_notebook_to_html()

'notebook_exports/TicTacToe_MCTSvsMCTS_100both_20250630-205518.html'