In [1]:
#import sys, os; sys.path.insert(1, os.path.join(sys.path[0], '..'))
import os
os.chdir('..')
import torch
import pandas as pd
from itertools import permutations
import numpy as np
from neural_network import NeuralNetwork
from players.uninformed_mcts_player import UninformedMCTSPlayer
from players.deep_mcts_player import DeepMCTSPlayer
from games.vortex import Vortex_5_20, Vortex_6_20, Vortex_7_20, Vortex_8_20, Vortex_9_20
from models.vornet import VorNet

All experiments will use the same model checkpoint for model weights. 

The model was initially trainined for the *Vortex_5_20* game/input shape. 5 is the board side size: a 25 node Vortex board which approximates a 5x5 Hex board in topology. 20 is the number of message passing steps in the construction of the neural network input.

In [2]:
checkpoint = '295'

A match is two games played with the same vortex board topology. On the first game player 0 plays first, on the second player 1 plays first.

Returns: score as list of results:
- player 0 first player 0 wins
- player 0 first player 1 wins
- player 1 first player 0 wins
- player 1 first player 1 wins

In [3]:
def play_match(game, players, verbose=False):

    # permutations to break the dependence on player order in measuring strength.
    matches = list(permutations(np.arange(len(players))))
    
    # Initialize scoreboard
    scores = np.zeros((len(matches), game.get_num_players()))

    # initialise the Vortex board
    vortex_board = game.get_initial_state()

    # Run the matches
    for i, order in enumerate(matches):
        s = vortex_board.copy()

        for p in players:
            p.reset() # Clear player trees to make the next match fair

        game_over = game.check_game_over(s)

        while game_over is None:
            p = order[game.get_player(s)]
            if verbose: print("Player #{}'s turn.".format(p))
            s = players[p].update_state(s)
            game_over = game.check_game_over(s)

        scores[i, list(order)] += game_over

    scores = list(scores.flatten().astype(int))
    return scores

**Evaluate model strength by comparing simulation requirements with an uninformed MCTS agent**

This runs the DeepMCTS model against an uninformed MCTS agent 

- the DeepMCTS agent has a fixed number of simulations 
- the uninformed MCTS agent uses a range of simulation counts

For each combination, *match_n* matches are played.

In [4]:
dmcts_sims = [40]
umcts_sims = [10, 20, 40, 80, 160, 320, 640, 1280, 2560]
match_n = 20

In [5]:
game = Vortex_5_20()
nn = NeuralNetwork(game, VorNet, cuda=True)
nn.load(checkpoint)
results = []

for dmcts_sim in dmcts_sims:
    for umcts_sim in umcts_sims:
        deep_mcts = DeepMCTSPlayer(game, nn, simulations=dmcts_sim)
        uninformed = UninformedMCTSPlayer(game, simulations=umcts_sim)
        players = [deep_mcts, uninformed]

        print("DMCTS: {}, UMCTS: {}".format(dmcts_sim, umcts_sim))

        for i in range(match_n):
            match = [dmcts_sim, umcts_sim, i]
            scores = play_match(game, players, verbose=False)
            print("  match {}, score {}".format(i, scores))
            results.append(match + scores)


DMCTS: 40, UMCTS: 10
  match 0, score [1, 0, 0, 1]
  match 1, score [1, 0, 0, 1]
  match 2, score [1, 0, 0, 1]
  match 3, score [1, 0, 1, 0]
  match 4, score [1, 0, 0, 1]
  match 5, score [1, 0, 0, 1]
  match 6, score [1, 0, 0, 1]
  match 7, score [1, 0, 0, 1]
  match 8, score [1, 0, 1, 0]
  match 9, score [1, 0, 0, 1]
  match 10, score [1, 0, 1, 0]
  match 11, score [0, 1, 0, 1]
  match 12, score [1, 0, 0, 1]
  match 13, score [1, 0, 0, 1]
  match 14, score [1, 0, 0, 1]
  match 15, score [1, 0, 1, 0]
  match 16, score [1, 0, 0, 1]
  match 17, score [1, 0, 1, 0]
  match 18, score [1, 0, 0, 1]
  match 19, score [1, 0, 1, 0]
DMCTS: 40, UMCTS: 20
  match 0, score [1, 0, 0, 1]
  match 1, score [1, 0, 0, 1]
  match 2, score [1, 0, 0, 1]
  match 3, score [1, 0, 1, 0]
  match 4, score [1, 0, 1, 0]
  match 5, score [1, 0, 1, 0]
  match 6, score [1, 0, 1, 0]
  match 7, score [1, 0, 1, 0]
  match 8, score [1, 0, 1, 0]
  match 9, score [1, 0, 1, 0]
  match 10, score [1, 0, 1, 0]
  match 11, score

In [6]:
players = [0,1]
index = pd.MultiIndex.from_product([players, players], names=['first_player', 'score_player'])
#index
df = pd.DataFrame(data=results, columns=['dmcts_sim', 'umcts_sim', 'match', 'p0fp_p0win', 'p0fp_p1win', 'p1fp_p0win', 'p1fp_p1win'])
df = df.set_index(['dmcts_sim', 'umcts_sim', 'match'])
df.columns = index
df.to_csv("./notebooks/results/simulations_compare.csv")
df.groupby(['dmcts_sim', 'umcts_sim']).sum()

Unnamed: 0_level_0,first_player,0,0,1,1
Unnamed: 0_level_1,score_player,0,1,0,1
dmcts_sim,umcts_sim,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
40,10,19,1,6,14
40,20,20,0,13,7
40,40,20,0,11,9
40,80,19,1,12,8
40,160,20,0,14,6
40,320,20,0,13,7
40,640,18,2,9,11
40,1280,17,3,8,12
40,2560,14,6,12,8


**Evaluate performance of DeepMCTS on larger boards than the 25 node board it was trained on**

The DeepMCTS agent is run against the uniformed MCTS agent. For each board size, *match_n* matches are played.

In [7]:
dmcts_sim = 40
umcts_sim = 320
board_sizes = [Vortex_5_20, Vortex_6_20, Vortex_7_20, Vortex_8_20, Vortex_9_20]
match_n = 100
directory = "checkpoints/Vortex_5_20-VorNet"

In [8]:
results = []
for Game in board_sizes:
    board_size = Game.__name__
    game = Game()
    nn = NeuralNetwork(game, VorNet, cuda=True)
    nn.load(checkpoint, directory=directory)

    deep_mcts = DeepMCTSPlayer(game, nn, simulations=dmcts_sim)
    uninformed = UninformedMCTSPlayer(game, simulations=umcts_sim)
    players = [deep_mcts, uninformed]

    print("Board size: {}".format(board_size))

    for i in range(match_n):
        match = [board_size, i]
        scores = play_match(game, players, verbose=False)
        print("  match {}, score {}".format(i, scores))
        results.append(match + scores)

Board size: Vortex_5_20
  match 0, score [1, 0, 0, 1]
  match 1, score [1, 0, 1, 0]
  match 2, score [1, 0, 1, 0]
  match 3, score [1, 0, 1, 0]
  match 4, score [1, 0, 0, 1]
  match 5, score [1, 0, 1, 0]
  match 6, score [1, 0, 0, 1]
  match 7, score [1, 0, 0, 1]
  match 8, score [1, 0, 1, 0]
  match 9, score [1, 0, 1, 0]
  match 10, score [1, 0, 1, 0]
  match 11, score [1, 0, 0, 1]
  match 12, score [1, 0, 1, 0]
  match 13, score [1, 0, 0, 1]
  match 14, score [1, 0, 0, 1]
  match 15, score [1, 0, 1, 0]
  match 16, score [1, 0, 0, 1]
  match 17, score [0, 1, 1, 0]
  match 18, score [1, 0, 1, 0]
  match 19, score [1, 0, 1, 0]
  match 20, score [1, 0, 0, 1]
  match 21, score [1, 0, 1, 0]
  match 22, score [1, 0, 1, 0]
  match 23, score [1, 0, 0, 1]
  match 24, score [1, 0, 0, 1]
  match 25, score [1, 0, 0, 1]
  match 26, score [1, 0, 0, 1]
  match 27, score [1, 0, 1, 0]
  match 28, score [1, 0, 0, 1]
  match 29, score [1, 0, 1, 0]
  match 30, score [1, 0, 0, 1]
  match 31, score [1, 0, 

In [9]:
players = [0,1]
index = pd.MultiIndex.from_product([players, players], names=['first_player', 'score_player'])
df = pd.DataFrame(data=results, columns=['board_size', 'match', 'p0fp_p0win', 'p0fp_p1win', 'p1fp_p0win', 'p1fp_p1win'])
df = df.set_index(['board_size', 'match'])
df.columns = index
df.to_csv("./notebooks/results/board_sizes.csv")
df.groupby(['board_size']).sum()


first_player,0,0,1,1
score_player,0,1,0,1
board_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Vortex_5_20,97,3,55,45
Vortex_6_20,97,3,48,52
Vortex_7_20,97,3,57,43
Vortex_8_20,99,1,74,26
Vortex_9_20,95,5,73,27


Simulating games to generate training data is time consuming on larger boards because untrained agents act close to randomly and this results in more moves required to reach a win state. 

Do models pretrained to smaller boards reduce this initial game simulation time?

In [11]:
# times for 60 games of 36 node Vortex, sides of 6, 100 simulations per action (tree search)
sim_times = [216, 215, 205, 205, 216, 204, 219, 209, 200, 203]
mean_game_time = sum(sim_times) / (len(sim_times) * 30)
mean_game_time

# side 7
sim_times = [339, 366, 448, 518, 499, 490, 471, 457, 467, 452]
mean_game_time = sum(sim_times) / (len(sim_times) * 30)
mean_game_time

# side 8
sim_times = [588, 583, 543, 527, 529, 523, 554, 537, 570, 564]
mean_game_time = sum(sim_times) / (len(sim_times) * 30)
mean_game_time

7.511666666666667