In [1]:
import tensorflow as tf
import numpy as np

from board import Board
from agent import Agent
from mcts import mcts, MCTS, TerminalStateException, _ucb_score, _simulate_truncated

In [2]:
CHECKPONT = '../checkpoints/othello_rl_v2_nomcts_train_20191031_111829/ckpt-60' 

agent = Agent(8)
tf.train.Checkpoint(net=agent).restore(CHECKPONT).expect_partial()

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1387c28d0>

In [3]:
board = Board()

s = MCTS(agent, n_iter=100, c=4)
root_node, mcts_p, _, _ = s.search(board, 0)
# root_node, mcts_p, _, _ = mcts(board, agent, 0, n_iter=100, c=4)

In [4]:
root_node.child_values

array([-0.5834996 , -0.23937608, -0.97238535,  0.15635358], dtype=float32)

In [73]:
import copy

def select_leaf_nodes(node, seq=[]):
    leaf_nodes = []
    if len(node.children) == 0:
        leaf_nodes.append((copy.copy(seq), node))
    else:
        for i, child in enumerate(node.children):
            child_seq = copy.copy(seq) + [i]
            leaf_nodes += select_leaf_nodes(child, seq=child_seq)
            
    return leaf_nodes
        
leaf_nodes = select_leaf_nodes(root_node)

In [7]:
def print_node(agent, node, c=4):
    if node is None:
        return
    print('Color: %d' % node.color)
    print(node.board)
    if len(node.children) > 0:
        p, _ = node.get_p_v(agent)
        p = p[node.valid_positions_indices]
        ucb_scores = _ucb_score(agent, node, c)
        for i in range(len(node.children)):
            child = node.children[i]
            print('Child: %d, Value: %.4f, Visits: %d, UCB: %.4f, Pi: %.4f' % (i, child.value / (1 + child.visits), child.visits, ucb_scores[i], p[i]))
            
        print('Max Visits: %d' % np.argmax([child.visits for child in node.children]))
        print('Max UCB: %d' % np.argmax(ucb_scores))
        print('Max PI: %d' % np.argmax(p))
        print()
        
        return node.children[np.argmax(ucb_scores)]

curr_node = root_node
while curr_node is not None:
    curr_node = print_node(agent, curr_node)


Color: 0
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	.	1	0	.	.	.
4 |	.	.	.	0	1	.	.	.
5 |	.	.	.	.	.	.	.	.
6 |	.	.	.	.	.	.	.	.
7 |	.	.	.	.	.	.	.	.
Player 0: 2, Player 1: 2
Child: 0, Value: 0.0180, Visits: 1, UCB: 0.2504, Pi: 0.0011
Child: 1, Value: 0.0154, Visits: 46, UCB: 0.3620, Pi: 0.0169
Child: 2, Value: 0.0085, Visits: 13, UCB: 0.3617, Pi: 0.0055
Child: 3, Value: 0.0171, Visits: 40, UCB: 0.3565, Pi: 0.0146
Max Visits: 1
Max UCB: 1
Max PI: 1

Color: 1
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	0	0	0	.	.	.
4 |	.	.	.	0	1	.	.	.
5 |	.	.	.	.	.	.	.	.
6 |	.	.	.	.	.	.	.	.
7 |	.	.	.	.	.	.	.	.
Player 0: 4, Player 1: 1
Child: 0, Value: -0.0298, Visits: 19, UCB: 0.5712, Pi: 0.0250
Child: 1, Value: 0.0086, Visits: 10, UCB: 0.5551, Pi: 0.0131
Child: 2, Value: -0.0125, Visits: 17, UCB: 0.5708, Pi: 0.0224
Max Visits: 0
Max UCB: 0
Max PI: 0

Color: 0
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0

In [6]:
_simulate_truncated(agent, curr_node)

AttributeError: 'NoneType' object has no attribute 'increment_visits'

In [None]:
print_node(agen)

In [39]:
def traverse_best(agent, node, c=4):
    print(node.board)
    if len(node.children) > 0:
        child_values =  _ucb_score(agent, node, c)
        idx = np.argmax(child_values)
        traverse_best(agent, node.children[idx], c=c)
    
traverse_best(agent, root_node)

	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	.	0	1	.	.	.
4 |	.	.	.	1	0	.	.	.
5 |	.	.	.	.	.	.	.	.
6 |	.	.	.	.	.	.	.	.
7 |	.	.	.	.	.	.	.	.
Player 0: 2, Player 1: 2
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	.	0	1	.	.	.
4 |	.	.	0	0	0	.	.	.
5 |	.	.	.	.	.	.	.	.
6 |	.	.	.	.	.	.	.	.
7 |	.	.	.	.	.	.	.	.
Player 0: 4, Player 1: 1
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	.	0	1	.	.	.
4 |	.	.	0	1	0	.	.	.
5 |	.	.	1	.	.	.	.	.
6 |	.	.	.	.	.	.	.	.
7 |	.	.	.	.	.	.	.	.
Player 0: 3, Player 1: 3
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	.	0	1	.	.	.
4 |	.	.	0	0	0	.	.	.
5 |	.	.	1	0	.	.	.	.
6 |	.	.	.	.	.	.	.	.
7 |	.	.	.	.	.	.	.	.
Player 0: 5, Player 1: 2
	0	1	2	3	4	5	6	7
	-	-	-	-	-	-	-	-
0 |	.	.	.	.	.	.	.	.
1 |	.	.	.	.	.	.	.	.
2 |	.	.	.	.	.	.	.	.
3 |	.	.	1	1	1	.	.	.
4 |	.	.	1	