In [4]:
from extended_quarto import ExtendQuarto
from player import RandomPlayer,NaivePlayer,RiskyPlayer,BlockAndRiskyPlayer,BlockPlayer
from quarto_new import Quarto, Player
from tqdm import tqdm
from rl import QLearningPlayer, train
import pickle
import numpy as np
import os


TRAIN = True

In [8]:
def runBenchmark(n_iter:int,player1: Player, player2:Player):
    v = 0
    draw = 0
    board = player1.extended_quarto
    board.quarto.set_players((player1,player2))
    for i in tqdm(range(n_iter)):
        #print(f"match muber: {i}")
        res = board.quarto.run()
        if res == -1:
            draw += 1
        else:
            v += 1 if not res else 0
        board.quarto.reset()
        #print(board.quarto.get_board_status())
        #print(board.available_pieces())
        #print(board.available_positions())
        #break
    print(f"winner ratio: {v/n_iter*100}")
    print(f"draw ratio: {draw/n_iter*100}")
    return v/n_iter*100,draw/n_iter*100

In [6]:
if TRAIN:
    # Create environment
    quarto = Quarto()
    env = ExtendQuarto(quarto)
    player = QLearningPlayer(quarto, True)
    def on_cycle_end(cycle):
        # Show some nice stats
        empty_state, empty_actions = env.reset_game()
        print(f'Q-table size={len(player.q_table)}, epsilon={player.epsilon}')
        open_moves = player._get_action_values(empty_state, empty_actions)
        best_open_moves = np.argsort(open_moves)[-5:]
        print(f'5 best opening moves={list(zip(best_open_moves, open_moves[best_open_moves]))}')
    train(env, player, on_cycle_end=on_cycle_end, cycles=6)

100%|██████████| 10000/10000 [03:29<00:00, 47.71it/s]
100%|██████████| 1000/1000 [00:34<00:00, 29.05it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.20it/s]


New best player saved:(0.3967, 0.5836)
Cycle 1/6: win rate = 0.3967, draw rate = 0.5836
score against himself: (0.265, 0.72)
score against random: (0.602, 0.378)
Q-table size=30976, epsilon=0.6065230778740716
5 best opening moves=[(78, 1.7185949999999999), (106, 1.75144519), (100, 2.05222257), (151, 2.4923863), (226, 91.28213868771176)]


100%|██████████| 10000/10000 [03:03<00:00, 54.57it/s]
100%|██████████| 1000/1000 [00:30<00:00, 32.40it/s]
100%|██████████| 1000/1000 [00:21<00:00, 46.35it/s]


New best player saved:(0.5081, 0.4809)
Cycle 2/6: win rate = 0.5081, draw rate = 0.4809
score against himself: (0.489, 0.508)
score against random: (0.592, 0.4)
Q-table size=49042, epsilon=0.36787024399384166
5 best opening moves=[(151, 8.138022827926969), (125, 8.45701228332924), (158, 8.482192266610001), (67, 9.248527589442492), (226, 91.87600921823667)]


100%|██████████| 10000/10000 [02:48<00:00, 59.51it/s]
100%|██████████| 1000/1000 [00:37<00:00, 26.83it/s]
100%|██████████| 1000/1000 [00:21<00:00, 47.18it/s]


New best player saved:(0.6199, 0.373)
Cycle 3/6: win rate = 0.6199, draw rate = 0.373
score against himself: (0.705, 0.291)
score against random: (0.567, 0.415)
Q-table size=60134, epsilon=0.22312179264543314
5 best opening moves=[(137, 13.511532318140038), (125, 13.818016764757392), (195, 17.544210917720545), (158, 23.63598033633966), (226, 79.25451071872106)]


100%|██████████| 10000/10000 [02:43<00:00, 61.24it/s]
100%|██████████| 1000/1000 [00:37<00:00, 26.45it/s]
100%|██████████| 1000/1000 [00:22<00:00, 45.11it/s]


New best player saved:(0.7119, 0.2824)
Cycle 4/6: win rate = 0.7119, draw rate = 0.2824
score against himself: (0.691, 0.302)
score against random: (0.549, 0.432)
Q-table size=68407, epsilon=0.1353285164160905
5 best opening moves=[(114, 15.584632026112292), (202, 18.47446876290545), (78, 18.804973956722908), (195, 20.91764062980969), (226, 76.74954610858023)]


100%|██████████| 10000/10000 [02:36<00:00, 63.89it/s]
100%|██████████| 1000/1000 [00:31<00:00, 31.70it/s]
100%|██████████| 1000/1000 [00:22<00:00, 44.44it/s]


New best player saved:(0.756, 0.2379)
Cycle 5/6: win rate = 0.756, draw rate = 0.2379
score against himself: (0.759, 0.237)
score against random: (0.564, 0.425)
Q-table size=74391, epsilon=0.1
5 best opening moves=[(115, 16.839515596077135), (114, 16.86749915794624), (58, 19.59134601082646), (210, 20.263488341201366), (195, 88.0657298557313)]


100%|██████████| 10000/10000 [02:59<00:00, 55.83it/s]
100%|██████████| 1000/1000 [00:31<00:00, 31.72it/s]
100%|██████████| 1000/1000 [00:22<00:00, 45.19it/s]


Cycle 6/6: win rate = 0.711, draw rate = 0.253
score against himself: (0.805, 0.192)
score against random: (0.558, 0.418)
Q-table size=81632, epsilon=0.1
5 best opening moves=[(108, 13.669192015795074), (236, 15.24257066464571), (128, 16.48422659868103), (204, 17.38585424232773), (218, 78.1381064140431)]


In [10]:
quarto = q()
random_player = RandomPlayer(quarto)
naivePlayer = NaivePlayer(quarto)
rl_player = player.get_freezed()
rl_player.set_quarto(quarto)
runBenchmark(1,rl_player,random_player)

  0%|          | 0/1 [00:00<?, ?it/s]


 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------

Selected piece: -1


 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------

Selected piece: 1


 -------------------
| -1 |  1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------

Selected piece: 1


 -------------------
| -1 |  1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------

Selected piece: 0


 -------------------
|  0 |  1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 -------------------
| -1 | -1 | -1 | -1 |
 ------------




AttributeError: 'Quarto' object has no attribute 'reset_game'

### Play a match

In [9]:
SAVED_PLAYER_PATH = "./trained_players/player.bin"
quarto = Quarto()
random_player = RandomPlayer(quarto)
naive_player = NaivePlayer(quarto)
rl_player = QLearningPlayer(quarto, True)

if os.path.isfile(SAVED_PLAYER_PATH):
    with open(SAVED_PLAYER_PATH, 'rb') as f_player_dump:
        rl_player = pickle.load(f_player_dump)
else:
    train(env, rl_player, on_cycle_end=on_cycle_end, cycles=5)
    
rl_player = player.get_freezed()
rl_player.set_quarto(quarto)
runBenchmark(5000,rl_player,random_player)

100%|██████████| 5000/5000 [01:08<00:00, 73.44it/s]

winner ratio: 56.54
draw ratio: 7.539999999999999





(56.54, 7.539999999999999)

In [None]:
# def runBenchmark(n_iter:int,player1: Player, player2:Player):
#     v = 0
#     draw = 0
#     for i in range(n_iter):
#         #print(f"match muber: {i}")
#         quarto = Quarto()
#         quarto.set_players((player1(quarto),player2(quarto)))
#         res = quarto.run()
#         if res == -1:
#             draw += 1
#         else:
#             v += 1 if not res else 0
#     print(f"winner ratio: {v/n_iter*100}")
#     print(f"draw ratio: {draw/n_iter*100}")
#     return v/n_iter*100,draw/n_iter*100

In [None]:
# print(f"Player 1: Naive, player 2: Random")
# runBenchmark(10,NaivePlayer,RandomPlayer)
# print(f"Player 1: Risk, player 2: Random")
# runBenchmark(10,RiskyPlayer,RandomPlayer)
# print(f"Player 1: Block, player 2: Random")
# runBenchmark(10,BlockPlayer,RandomPlayer)
# print(f"Player 1: BlockAndRisk, player 2: Random")
# runBenchmark(10,BlockAndRiskyPlayer,RandomPlayer)

# print(f"Player 1: Risk, player 2: Naive")
# runBenchmark(10,RiskyPlayer,NaivePlayer)
# print(f"Player 1: Block, player 2: Naive")
# runBenchmark(10,BlockPlayer,NaivePlayer)
# print(f"Player 1: BlockAndRisk, player 2: Naive")
# runBenchmark(10,RiskyPlayer,NaivePlayer)

# print(f"Player 1: Risk, player 2: Block")
# runBenchmark(10,RiskyPlayer,BlockPlayer)
# print(f"Player 1: BlockAndRisk, player 2: Block")
# runBenchmark(10,BlockAndRiskyPlayer,BlockPlayer)

# print(f"Player 1: BlockAndRisk, player 2: Risk")
# runBenchmark(10,BlockAndRiskyPlayer,RiskyPlayer)