## AlphaZero Implementation
One step demonstration

note: still in progress

#### Import common library

In [3]:
import time
import shlex
import hashlib
from collections import defaultdict
import numpy as np

#### Import MCTS library

In [38]:
from MCTS.utils import *
from MCTS.Agent import *
from MCTS.MCTS import *

# note: The MCTS library was designed for 'vanilla' MCTS and not specially 
# for Zero's implemetation. We shall modify the library as we go by.

#### Import game rules

In [39]:
import rules.Othello as Othello
# shorthands
OthelloGame   = Othello.OthelloGame   
OthelloHelper = Othello.OthelloHelper

#### Define hyperparameters

In [43]:
# MCTS search related
c = 1.2
allowed_time = 10

# Game specific
state_memory_n = 1
board_size = [8, 8]

#### Import and implement data structure for the game

In [41]:
class OthelloDataNode(ZeroDataNode):
    def __init__(self, name, Game=OthelloGame, player=1):
        super().__init__(Game=Game, name=name, player=player)
    # end def
# end class

#### Define neural network

In [46]:
class zero_net:
    pass
    # TODO
# end class
net = zero_net()

def nnet_pred(state):
    P, v = net.forward(state)
    P = P.reshape(board_size)
    return P, v
# end def

#### Define exit conditions

In [18]:
def exit_cond(time0, time_thr):
    # time based
    if time.time() - time0 > time_thr:
        return True
    # end if
    
    # winning prob based
    # TODO
# end def

#### Main logic

In [45]:
def start_mcts(node, allowed_time):
    assert node.parent is None
    winner_value_dict = {1: 1, -1: 0, 0: 0.5}

    while True:
        # check exit condition
        if exit_cond(time0, allowed_time):
            break
        # end if

        # dynamically expand the tree - search for first un-expanded node
        while True:
            if node.end_game() is True:
                break
            # end if
            if node.expanded is True:
                # choose node
                # 1. compute Q value for every node
                N = sum(node.N.values())
                actions = list(node.Q.keys())
                U = {key: None for key in actions}
                V = []
                for a in actions:
                    U[a] = c*np.sqrt(N) / (1+node.N[a]) * node.P[a]
                    V.append(node.Q[a] + U[a])
                # end for

                # 2. choose action that maximizes V
                idx = np.argmax(V)
                action = actions[idx]
                node = node.child_dict[action]
            else:
                break
            # end if
        # end while

        # if end_game, simply evaluate
        _winner = node.get_winner()
        if _winner is not None:
            v = winner_value_dict[_winner]
            node.backprob(v)
            continue
        # end if
        # if not, expand and evaluate

        # - list all possible moves
        child_nodes = node.grow_branches() 
        # append legal child nodes
        node.append_children(child_nodes)

        # - neuralnet evaluation here
        P, v = nnet_pred(node.state)

        # back-propagation
        node.assign_probs(P)
        node.backprob(v)

        # go back to root
        node = node.root
    # end while
    
    return node
# end def

#### Initialize the board

In [8]:
new_board = OthelloHelper.new_board()

In [16]:
init_state = [stateType(data=new_board, player=1) for _ in range(state_memory_n)]

In [17]:
# show the board (optional)
# OthelloHelper.print_board(init_state[0])

#### Initialize a node

In [None]:
node = self.Node("root")
node.state = init_state

In [None]:
# sample the node
sampled_node = start_mcts(node, allowed_time=allowed_time)

#### Choose a node using the tree search result

In [None]:
chosen_node = choose_node(sampled_node.children)

#### Print the selected action

In [None]:
print(chosen_node.action)