In [2]:
import numpy as np
import time
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.optim as optim
import matplotlib.pyplot as plt
import sys, os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

from Resources.Model import Model_v4
from Resources.Game import *
from Resources.TS_ModelGuided import *
from Resources.TS_ModelGuided_MCRollout import *


##### Setup Model

In [61]:
model_1 = Model_v4()
model_1.eval()
model_1.load_state_dict(torch.load('../Train Value Function/Monte Carlo/Model Saves MC v4/model_51410_games'))

model_2 = Model_v4()
model_2.eval()
model_2.load_state_dict(torch.load('../Train Value Function/Monte Carlo/Model Saves MC v4/model_51410_games'))

# model2 = Model_v4()
# model2.eval()

<All keys matched successfully>

Settings

In [65]:
n_test_games = 100

# strategies = ['one step greedy', 'TS', 'TS with rollout']

strategy_1 = 'one step greedy'
strategy_2 = 'TS'

tmax_1 = 1 # seconds of tree search per move, if applies
tmax_2 = 10 # seconds of tree search per move, if applies

wins_1 = 0
wins_2 = 0
draws  = 0

In [13]:
for i_test_game in range(n_test_games): # loop through test games

    game = Game()
    i = 0

    color_choice = np.random.choice([True, False])

    if color_choice:
        color_1 = 'white'
        color_2 = 'black'
    else:
        color_1 = 'black'
        color_2 = 'white'

    value_list_1 = []
    value_list_2 = []

    while not game.is_over(): # loop through moves in current test game

        if game.turn == color_1:
            model_curr      = model_1
            strategy_curr   = strategy_1
            if 'TS' in strategy_curr:
                tmax_curr   = tmax_1
        else:
            model_curr      = model_2
            strategy_curr   = strategy_2
            if 'TS' in strategy_curr:
                tmax_curr   = tmax_2

        if strategy_curr == 'one step greedy':

            # play move that achieves highest value given current model
            # except: if a move checkmates, always chose that move

            moves = game.PossibleMoves()
            game_ini = game.copy()
            board_batch = []

            mate = False
            for move in moves:
                game.PlayMove(move)
                board_batch.append(board_to_tensor(game.pieces))
                game.FlipBoard()
                if game.is_over():
                    if game.get_winner() != 'draw':
                        mate = True
                        chosen_move = move
                        game = game_ini.copy()
                        break
                game = game_ini.copy()

            if not mate:
                
                board_tensor = torch.stack(board_batch)

                if i < 11: # in early phase of game, chose move stochastically to avoid repeating games
                    values = model_curr(board_tensor)
                    values_diff = [10*(values[i] - torch.mean(values)) for i in range(0, len(values))]
                    move_prob = torch.softmax(torch.Tensor(values_diff), dim=0).numpy()
                    chosen_i = np.random.choice(range(len(moves)), p=move_prob)
                    chosen_move = moves[chosen_i]

                else:
                    values = model_curr(board_tensor).detach().numpy()
                    chosen_move = moves[np.argmax(values)]

            if game.turn == color_1:
                value_list_1.append(max(values))
            if game.turn == color_2:
                value_list_2.append(max(values))

        elif strategy_curr == 'TS':

            chosen_move, root = ModelGuided_TS(game, model_curr, root=None, tmax=tmax_curr, prints=False)

        elif strategy_curr == 'TS with rollout':

            chosen_move, root = MC_TS(game, model_curr, root=None, tmax=tmax_curr, prints=False)

        game.PlayMove(chosen_move)
        game.FlipBoard()

        i += 1

    winner = game.get_winner()

    if winner == color_1:
        wins_1 += 1
        print('win 1')
    elif winner == color_2:
        wins_2 += 1
        print('win 2')
    elif winner == 'draw':
        draws += 1
        print('draw')

    print(' -- {} -- winner: {}'.format(i_test_game, winner))

print()
print('final statistics:')
print('wins 1: {}, draws: {}, wins 2: {}'.format(wins_1, draws, wins_2))


 -- 0 -- winner: white
 -- 1 -- winner: black
 -- 2 -- winner: black
 -- 3 -- winner: black
 -- 4 -- winner: black
 -- 5 -- winner: draw
 -- 6 -- winner: black
 -- 7 -- winner: white
 -- 8 -- winner: draw
 -- 9 -- winner: black
 -- 10 -- winner: black
 -- 11 -- winner: draw
 -- 12 -- winner: draw
 -- 13 -- winner: black
 -- 14 -- winner: black
 -- 15 -- winner: black
 -- 16 -- winner: white
 -- 17 -- winner: draw
 -- 18 -- winner: white
 -- 19 -- winner: black
 -- 20 -- winner: black
 -- 21 -- winner: draw
 -- 22 -- winner: black
 -- 23 -- winner: draw
 -- 24 -- winner: draw
 -- 25 -- winner: black
 -- 26 -- winner: black
 -- 27 -- winner: white
 -- 28 -- winner: black
 -- 29 -- winner: black
 -- 30 -- winner: draw
 -- 31 -- winner: white
 -- 32 -- winner: black
 -- 33 -- winner: black
 -- 34 -- winner: white
 -- 35 -- winner: black
 -- 36 -- winner: white
 -- 37 -- winner: black
 -- 38 -- winner: white
 -- 39 -- winner: draw
 -- 40 -- winner: draw
 -- 41 -- winner: white
 -- 42 -- win

In [None]:
'''
MC (51410) one step greedy  vs  MC (51410) one step greedy
    wins 1: 312, draws: 420, wins 2: 303

MC (51410) one step greedy  vs  untrained one step greedy
    wins 1: 828, draws: 113, wins 2: 59

MC (51410) one step greedy  vs MC (51410) TS (10s)
    wins 1: 65, draws: 26, wins 2: 9

'''