In [1]:
from games.tictactoe.tictactoe import TicTacToe
from agents.agent_random import RandomAgent
from agents.minimax import MiniMax
import numpy as np
from collections import defaultdict

In [2]:
game = TicTacToe(render_mode='')

In [3]:
agents_rd = dict(map(lambda agent: (agent, RandomAgent(game=game, agent=agent)), game.agents))
agents_rd

{'X': <agents.agent_random.RandomAgent at 0x1100e9450>,
 'O': <agents.agent_random.RandomAgent at 0x1100e9a90>}

In [4]:
game.reset()
while not game.terminated():
    game.render()
    print(game.eval(game.agent_selection))
    action = agents_rd[game.agent_selection].action()
    game.step(action)
game.render()
print(game.eval(game.agent_selection))
print(game.rewards)

Player: X
Board:
 .  .  . 
 .  .  . 
 .  .  . 

0.0
Player: O
Board:
 .  .  . 
 X  .  . 
 .  .  . 

-0.25
Player: X
Board:
 O  .  . 
 X  .  . 
 .  .  . 

-0.125
Player: O
Board:
 O  .  . 
 X  .  . 
 X  .  . 

-0.125
Player: X
Board:
 O  .  O 
 X  .  . 
 X  .  . 

-0.125
Player: O
Board:
 O  .  O 
 X  .  X 
 X  .  . 

0.0
Player: X
Board:
 O  .  O 
 X  .  X 
 X  .  O 

-0.125
Player: O
Board:
 O  .  O 
 X  X  X 
 X  .  O 

-1
{'X': 1, 'O': -1}


In [5]:
players = {}
players[game.agents[0]] = MiniMax(game=game, agent=game.agents[0], depth=1)
players[game.agents[1]] = MiniMax(game=game, agent=game.agents[1], depth=4)


In [6]:
game.reset()
game.render()
print(game.observe(game.agents[0]))
action, value = players[game.agent_selection].minimax(game, depth=1)
print(action, value)
game.step(action)
game.render()
print(game.observe(game.agents[1]))
action, value = players[game.agent_selection].minimax(game, depth=4)
game.step(action)
print(action, value)
game.render()


Player: X
Board:
 .  .  . 
 .  .  . 
 .  .  . 

[[0 0 0]
 [0 0 0]
 [0 0 0]]
4 0.5
Player: O
Board:
 .  .  . 
 .  X  . 
 .  .  . 

[[0 0 0]
 [0 2 0]
 [0 0 0]]
0 -0.375
Player: X
Board:
 O  .  . 
 .  X  . 
 .  .  . 



In [7]:
values = defaultdict(list)
N = 10
for i in range(N):    
    game.reset()
    while not game.terminated():
        agent = game.agent_selection
        action = players[agent].action()
        game.step(action)
    for agent in game.agents:
        values[agent].append(game.reward(agent))
for agent in game.agents:
    print(f"Agent {agent} average reward: {np.mean(values[agent])} over {N} games")
    print(f"Agent {agent} rewards: {values[agent]}")

Agent X average reward: -0.5 over 10 games
Agent X rewards: [0, 1, 1, -1, -1, -1, -1, -1, -1, -1]
Agent O average reward: 0.5 over 10 games
Agent O rewards: [0, -1, -1, 1, 1, 1, 1, 1, 1, 1]
