In [1]:
from pprint import pprint
import numpy as np

---

## Test simulator

In [2]:
from alphazero.addons.simulator import Simulator

In [3]:
board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 4], [4, 0, 4, 2]])

In [4]:
pprint(board)

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 4],
       [4, 0, 4, 2]])


In [5]:
simulator = Simulator()

In [6]:
output = simulator.step(board)

In [7]:
pprint(output.stochastic_states[0][0])

[StochasticState(state=array([[2, 0, 0, 0],
       [0, 0, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),
 StochasticState(state=array([[0, 2, 0, 0],
       [0, 0, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),
 StochasticState(state=array([[0, 0, 2, 0],
       [0, 0, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),
 StochasticState(state=array([[0, 0, 0, 2],
       [0, 0, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),
 StochasticState(state=array([[0, 0, 0, 0],
       [2, 0, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),
 StochasticState(state=array([[0, 0, 0, 0],
       [0, 2, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),
 StochasticState(state=array([[0, 0, 0, 0],
       [0, 0, 2, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]]), probability=0.06923076923076923),

---
## Expand Node

In [8]:
from alphazero.search.mcts import expand_node, select_child, backpropagate
from alphazero.search.node import Node
from alphazero.search.helpers import MinMaxStats

In [9]:
from alphazero.addons.types import NetworkOutput
from alphazero.addons.config import UpperConfidenceBounds

In [10]:
root = Node(0)
root.state = board

In [11]:
network_output = NetworkOutput(value=4., probabilities={0: 0.45, 1: 0.1, 2: 0.45, 3: 0.0})

In [12]:
pprint(root.children)

{}


In [13]:
expand_node(root, network_output, output)

In [14]:
pprint(root.children)

{0: <alphazero.search.node.Node object at 0x10fb864f0>,
 1: <alphazero.search.node.Node object at 0x10fb86670>,
 2: <alphazero.search.node.Node object at 0x10fb86820>,
 3: <alphazero.search.node.Node object at 0x10fb78ee0>}


In [15]:
pprint(root.children[0].prior)
pprint(root.children[0].is_chance)

0.45
True


In [16]:
pprint(root.children[0].children)

{0: <alphazero.search.node.Node object at 0x10fb867c0>,
 1: <alphazero.search.node.Node object at 0x10fb86fa0>,
 2: <alphazero.search.node.Node object at 0x10fb86f70>,
 3: <alphazero.search.node.Node object at 0x10fb86970>,
 4: <alphazero.search.node.Node object at 0x10fb86400>,
 5: <alphazero.search.node.Node object at 0x10fb86f40>,
 6: <alphazero.search.node.Node object at 0x10fb864c0>,
 7: <alphazero.search.node.Node object at 0x10fb86d90>,
 8: <alphazero.search.node.Node object at 0x10fb86d60>,
 9: <alphazero.search.node.Node object at 0x10fb86d30>,
 10: <alphazero.search.node.Node object at 0x10fb86df0>,
 11: <alphazero.search.node.Node object at 0x10fb86e20>,
 12: <alphazero.search.node.Node object at 0x10fb866d0>,
 13: <alphazero.search.node.Node object at 0x10fb861f0>,
 14: <alphazero.search.node.Node object at 0x10fb86070>,
 15: <alphazero.search.node.Node object at 0x10fb86700>,
 16: <alphazero.search.node.Node object at 0x10fb86a30>,
 17: <alphazero.search.node.Node object a

In [17]:
pprint(root.children[0].children[0].prior)
pprint(root.children[0].children[0].is_chance)
pprint(root.children[0].children[0].state)
pprint(root.children[0].children[0].reward)

0.06923076923076923
False
array([[2, 0, 0, 0],
       [0, 0, 0, 0],
       [4, 0, 0, 0],
       [8, 2, 0, 0]])
8


In [18]:
min_max_stats = MinMaxStats(None)

In [19]:
bounds = UpperConfidenceBounds(discount=0.999, pb_c_base=19652, pb_c_init=1.25)

In [20]:
node = root
search_path = [node]

while node.expanded():
    node = select_child(bounds, node, min_max_stats)
    search_path.append(node)

In [21]:
pprint(node.is_chance)
pprint(node.prior)
pprint(node.reward)
pprint(node.state)
pprint(node.value())

False
1.0
-10
array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 4],
       [4, 0, 4, 2]])
0


In [23]:
backpropagate(search_path, network_output.value, bounds.discount, min_max_stats)

In [24]:
pprint(node.is_chance)
pprint(node.prior)
pprint(node.reward)
pprint(node.state)
pprint(node.value())

False
1.0
-10
array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 4],
       [4, 0, 4, 2]])
4.0


In [25]:
node = root
search_path = [node]

while node.expanded():
    node = select_child(bounds, node, min_max_stats)
    search_path.append(node)

In [26]:
pprint(node.is_chance)
pprint(node.prior)
pprint(node.reward)
pprint(node.state)
pprint(node.value())

False
0.06923076923076923
8
array([[0, 0, 0, 0],
       [0, 0, 2, 0],
       [0, 0, 0, 4],
       [0, 0, 8, 2]])
0
