In [1]:
from games.tictactoe.tictactoe import TicTacToe
from agents.agent_random import RandomAgent
from agents.minimax import MiniMax
from agents.mcts import MonteCarloTreeSearch, MCTSNode
import numpy as np
from collections import defaultdict

In [2]:
game = TicTacToe(render_mode='')

In [3]:
agents_rd = dict(map(lambda agent: (agent, RandomAgent(game=game, agent=agent)), game.agents))
agents_rd

{'X': <agents.agent_random.RandomAgent at 0x25335b0bed0>,
 'O': <agents.agent_random.RandomAgent at 0x25335b18510>}

In [4]:
game.reset()
while not game.terminated():
    game.render()
    print(game.eval(game.agent_selection))
    action = agents_rd[game.agent_selection].action()
    game.step(action)
game.render()
print(game.eval(game.agent_selection))
print(game.rewards)

Player: X
Board:
 .  .  . 
 .  .  . 
 .  .  . 

0.0
Player: O
Board:
 .  .  . 
 X  .  . 
 .  .  . 

-0.25
Player: X
Board:
 .  O  . 
 X  .  . 
 .  .  . 

0.0
Player: O
Board:
 .  O  . 
 X  .  X 
 .  .  . 

-0.125
Player: X
Board:
 .  O  . 
 X  .  X 
 O  .  . 

-0.25
Player: O
Board:
 X  O  . 
 X  .  X 
 O  .  . 

0.0
Player: X
Board:
 X  O  . 
 X  .  X 
 O  O  . 

0.0
Player: O
Board:
 X  O  X 
 X  .  X 
 O  O  . 

-0.125
Player: X
Board:
 X  O  X 
 X  .  X 
 O  O  O 

-1
{'X': -1, 'O': 1}


In [5]:
players = {}
players[game.agents[1]] = RandomAgent(game=game, agent=game.agents[0])
players[game.agents[0]] = MonteCarloTreeSearch(game=game, agent=game.agents[1])


In [9]:
game.reset()
game.render()
print(game.observe(game.agents[0]))
action, value = players[game.agent_selection].mcts()
print(action)
game.step(action)
game.render()
print(game.observe(game.agents[1]))
action = players[game.agent_selection].action()
game.step(action)
print(action, value)
game.render()


Player: X
Board:
 .  .  . 
 .  .  . 
 .  .  . 

[[0 0 0]
 [0 0 0]
 [0 0 0]]
1
Player: O
Board:
 .  X  . 
 .  .  . 
 .  .  . 

[[0 2 0]
 [0 0 0]
 [0 0 0]]
0 -0.99
Player: X
Board:
 O  X  . 
 .  .  . 
 .  .  . 



In [10]:
values = defaultdict(list)
N = 10
for i in range(N):    
    game.reset()
    while not game.terminated():
        agent = game.agent_selection
        action = players[agent].action()
        game.step(action)
    for agent in game.agents:
        values[agent].append(game.reward(agent))
for agent in game.agents:
    print(f"Agent {agent} average reward: {np.mean(values[agent])} over {N} games")
    print(f"Agent {agent} rewards: {values[agent]}")

Agent X average reward: 0.1 over 10 games
Agent X rewards: [0, 1, 1, 0, 0, 1, -1, 0, 0, -1]
Agent O average reward: -0.1 over 10 games
Agent O rewards: [0, -1, -1, 0, 0, -1, 1, 0, 0, 1]
