In [1]:
import random
import math
import numba
from copy import deepcopy

from chainreaction.game import *
from chainreaction.positions import *
from chainreaction.board import *
from chainreaction.mcts import *
from chainreaction.four_players import *

In [2]:
%load_ext line_profiler
%load_ext Cython

In [3]:
def next_viable_player(player, utilities):
    next_player = player.next_player()
    while utilities[next_player.get_zero_indexed_player_idx()] == 0:
        next_player = next_player.next_player()

    return next_player

def get_scores(state):
    utilities = board_utility(state.atom_type)
    scores = utilities / np.sum(utilities)
    assert np.max(scores) <= 1.0
    return scores

In [4]:
def mcts_step(state, player):
    # make sure we are calling the right type of mcts node
    # maybe handle it in base mcts node class
    root = MCTSNormalNode(state, parent=None, parent_action=None, player=player)
    return None, root.best_action().parent_action

In [5]:
def simulate(state):
    player = FourPlayers.P1
    temp_player = player
    moves_count = 0
    while True:
        _, move = mcts_step(state.get_copy(), deepcopy(player))
        moves_count += 1
        print(f'{player} placing on {move}')
        state, player, utilities, terminal = game_step(state, player, move)
        print("New State:")
        print(format_board(state))
        if terminal and moves_count>4:
            max_value = max(utilities)
            won_players = [i+1 for i, j in enumerate(utilities) if j == max_value]
            if len(won_players)==1:
                print("Game Won by Player " + str(won_players[0]))
            else:
                won_players_ints = [str(i) for i in won_players]
                print("Game Won by Players " + (", ".join(won_players_ints)))
            break
        temp_player = player
        print('=='*20)

In [8]:
w, h = 3, 3
state = Board(w, h)
state.place_atom(0, 0, FourPlayers.P1.value)
state.place_atom(0, 2, FourPlayers.P2.value)
state.place_atom(2, 0, FourPlayers.P34.value)
state.place_atom(2, 2, FourPlayers.P34.value)

In [7]:
simulate(state)

FourPlayers.P1 placing on (2, 1)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '----']
 ['1-P34' '1-P1' '1-P34']]
FourPlayers.P2 placing on (1, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '1-P2']
 ['1-P34' '1-P1' '1-P34']]
FourPlayers.P3 placing on (2, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '2-P34']
 ['1-P34' '2-P34' '----']]
FourPlayers.P4 placing on (2, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '2-P34']
 ['1-P34' '2-P34' '1-P4']]
FourPlayers.P1 placing on (1, 1)
New State:
[['1-P1' '----' '1-P2']
 ['----' '1-P1' '2-P34']
 ['1-P34' '2-P34' '1-P4']]
FourPlayers.P2 placing on (1, 0)
New State:
[['1-P1' '----' '1-P2']
 ['1-P2' '1-P1' '2-P34']
 ['1-P34' '2-P34' '1-P4']]
FourPlayers.P3 placing on (2, 1)
New State:
[['1-P1' '----' '1-P2']
 ['1-P2' '1-P1' '2-P34']
 ['1-P34' '2-P3' '1-P4']]
FourPlayers.P4 placing on (2, 2)
New State:
[['1-P1' '1-P4' '----']
 ['2-P4' '3-P4' '2-P4']
 ['----' '2-P4' '----']]
FourPlayers.P1 placing on (2, 2)
New State:
[[

In [9]:
%lprun -f MCTSNormalNode._tree_policy simulate(state)

FourPlayers.P1 placing on (1, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '1-P1']
 ['1-P34' '----' '1-P34']]
FourPlayers.P2 placing on (2, 1)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '1-P1']
 ['1-P34' '1-P2' '1-P34']]
FourPlayers.P3 placing on (2, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '2-P34']
 ['1-P34' '2-P34' '----']]
FourPlayers.P4 placing on (1, 2)
New State:
[['1-P1' '1-P34' '----']
 ['----' '1-P34' '1-P34']
 ['1-P34' '2-P34' '1-P34']]
FourPlayers.P1 placing on (1, 0)
New State:
[['1-P1' '1-P34' '----']
 ['1-P1' '1-P34' '1-P34']
 ['1-P34' '2-P34' '1-P34']]
FourPlayers.P3 placing on (2, 1)
New State:
[['1-P1' '1-P34' '----']
 ['2-P34' '2-P34' '2-P34']
 ['----' '2-P34' '----']]
FourPlayers.P4 placing on (0, 2)
New State:
[['1-P1' '1-P34' '1-P4']
 ['2-P34' '2-P34' '2-P34']
 ['----' '2-P34' '----']]
FourPlayers.P1 placing on (2, 0)
New State:
[['1-P1' '1-P34' '1-P4']
 ['2-P34' '2-P34' '2-P34']
 ['1-P1' '2-P34' '----']]
FourPlayers.P3 placing on (2, 

Timer unit: 1e-07 s

Total time: 37.8251 s
File: c:\Users\rahul\Documents\GameAI530\chainreaction\mcts.py
Function: _tree_policy at line 99

Line #      Hits         Time  Per Hit   % Time  Line Contents
    99                                               def _tree_policy(self):
   100     45000     280435.0      6.2      0.1          current_node = self
   101    315186    1946923.0      6.2      0.5          while not current_node.is_state_terminal:
   102    278751    2443477.0      8.8      0.6              if not current_node.is_fully_expanded():
   103      8565   25419164.0   2967.8      6.7                  return current_node.expand()
   104                                                       else:
   105    270186  348058859.0   1288.2     92.0                  current_node = current_node.best_child()
   106     36435     101967.0      2.8      0.0          return current_node

In [9]:
%lprun -f MCTSNormalNode._tree_policy simulate(state)

FourPlayers.P1 placing on (2, 1)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '----']
 ['1-P34' '1-P1' '1-P34']]
FourPlayers.P2 placing on (1, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '1-P2']
 ['1-P34' '1-P1' '1-P34']]
FourPlayers.P3 placing on (2, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '2-P34']
 ['1-P34' '2-P34' '----']]
FourPlayers.P4 placing on (2, 2)
New State:
[['1-P1' '----' '1-P2']
 ['----' '----' '2-P34']
 ['1-P34' '2-P34' '1-P4']]
FourPlayers.P1 placing on (1, 1)
New State:
[['1-P1' '----' '1-P2']
 ['----' '1-P1' '2-P34']
 ['1-P34' '2-P34' '1-P4']]
FourPlayers.P2 placing on (1, 0)
New State:
[['1-P1' '----' '1-P2']
 ['1-P2' '1-P1' '2-P34']
 ['1-P34' '2-P34' '1-P4']]
FourPlayers.P3 placing on (2, 1)
New State:
[['1-P1' '1-P34' '----']
 ['2-P34' '3-P34' '1-P34']
 ['----' '2-P34' '1-P34']]
FourPlayers.P4 placing on (2, 2)
New State:
[['2-P4' '2-P4' '1-P4']
 ['----' '2-P4' '1-P4']
 ['2-P4' '2-P4' '----']]
Game Won by Player 4


Timer unit: 1e-07 s

Total time: 5.06943 s
File: c:\Users\rahul\Documents\GameAI530\chainreaction\mcts.py
Function: _tree_policy at line 99

Line #      Hits         Time  Per Hit   % Time  Line Contents
    99                                               def _tree_policy(self):
   100     40000     200289.0      5.0      0.4          current_node = self
   101    308516    1248906.0      4.0      2.5          while not current_node.is_state_terminal:
   102    269173    1934136.0      7.2      3.8              if not current_node.is_fully_expanded():
   103       657    1699686.0   2587.0      3.4                  return current_node.expand()
   104                                                       else:
   105    268516   45509297.0    169.5     89.8                  current_node = current_node.best_child()
   106     39343     101972.0      2.6      0.2          return current_node