# Play Go against the best model

## Import functions and define functions for playing

In [1]:
import numpy as np
from go import go_board
from MCTS import state_node, MCTS, gpu_worker, rotate_S, reverse_rotate
import torch
from torch.multiprocessing import Process, Queue, Pipe, Value, Lock, Manager, Pool
from training_functions import load_latest_model
from IPython.display import clear_output


def print_board(board_array):
    text_board = np.chararray((9,9), unicode=True)
    print("  0 1 2 3 4 5 6 7 8")
    for i in range(9):
        for j in range(9):
            if board_array[i,j]==-1:
                text_board[i,j] = u"\u25EF"

            elif board_array[i,j]==0:
                text_board[i,j] = "+"#u"\uFF0B"

            else:
                text_board[i,j] = u"\u25CF"
        print(i, end = ' ')
        print(*text_board[i], sep = " ")
        
def get_player_move(legal_board):
    while True:
        print("Enter move as 'x,y' or 'pass': ")
        move = input()
        if move == "pass":
            move == "pass"
            break
        try:
            move = (int(move[0]),int(move[2]))
            array = np.zeros((9,9))
            test = array[move]
            
            # If illigal move is taken
            if (legal_board[move]!=1):
                continue
            
            break
        except:
            continue
    return move

def gen_node(gpu_Q, go_game, color, conn_rec, conn_send, game_beginning):
        # A function for generating a node of If no node of board state exists
        relative_value =  {"black": 1,
                           "white": -1}
        S = go_game.get_state(color)
        S, rotation, reflection  = rotate_S(S)
        gpu_Q.put([S, conn_send])
        P, v = conn_rec.recv()
        P = reverse_rotate(P, rotation, reflection)
        v = relative_value[color]*v
    
        # Add legal board attribute
        if (game_beginning):
            # Generate start node
            root_node = state_node(go_game, P, color)
            root_node.illigal_board = np.zeros(82)
        else:
            # Ensure illigal moves are removed
            legal_board = np.empty((82), dtype=float)
            legal_board[0:81] = np.ndarray.flatten(go_game.get_legal_board(color))
            legal_board[81] = 1
            P = np.multiply(P,legal_board)
            P = P/np.sum(P)
            
            # Generate start node
            root_node = state_node(go_game, P, color)
            # Make large negative penalty to stop choosing illigal moves
            root_node.illigal_board = (legal_board-1)*1000
            
        return root_node, v

## Parameters to choose

In [None]:
player_color = "white" # Which color you want to play
N_sim = 400 # Number of MCTS simulations. We recommend >400

## "Game engine"

In [None]:
board_size = 9
model = load_latest_model()
model.eval()

gpu_Q = Queue()
# Define pipe for GPU process
conn_rec, conn_send = Pipe(False)
temp_switch = 0  #Number of turns before other temperature measure is used
eta_par = 0.03
epsilon = 0.25
turn_switcher = {"black": "white",
                     "white": "black"}

process_workers = []
#torch.multiprocessing.set_start_method('spawn', force=True)
process_workers.append(Process(target=gpu_worker, args=(gpu_Q, 1, board_size, model)))
# Start gpu and data_loader worker
for p in process_workers:
    p.start()

    
number_passes = 0
go_game = go_board()
current_color = "black"

if (player_color=="white"):
    root_node, v = gen_node(gpu_Q, go_game, "black", conn_rec, conn_send, True)

while (number_passes<2):
    # Check if player turn
    if (player_color==current_color):
        # Print board
        print_board(go_game.board)
        # Get move
        legal_board = go_game.get_legal_board(player_color)
        move = get_player_move(legal_board)
        
        if (move=="pass"):
            number_passes += 1
        else:
            number_passes = 0
            
        go_game.move(move, current_color)
        clear_output()
        print_board(go_game.board)
    # Else computer turn
    else:
        root_node = MCTS(root_node, gpu_Q, N_sim, go_game, current_color, number_passes)
            
        # Compute legal policy
        pi_legal = root_node.N/root_node.N_total

        # Selecet move
        move = np.argmax(root_node.N)
        
        if (move==81):
            go_game.move('pass', current_color)
            number_passes += 1
            clear_output()
            print("Computer move: pass")
        else:
            go_game.move(np.unravel_index(move, (9,9)), current_color)
            number_passes = 0
            clear_output()
            print("Computer move: " + str((np.unravel_index(move, (9,9)), current_color)))
        
    try:
        # If any exploration for the oppponents chosen action has been done
        root_node = root_node.action_edges[move]
    except:
        # Evalute first node
        root_node, v = gen_node(gpu_Q, go_game, turn_switcher[current_color], conn_rec, conn_send, False)
        
    current_color = turn_switcher[current_color]
points = go_game.count_points()
# Is black is winner
if (points>0):
    if (player_color=="black"):
        print("You win! Score was: " + str(points))
    else:
        print("You lose! Score was: " + str(points))
else:
    if (player_color=="white"):
        print("You win! Score was: " + str(points))
    else:
        print("You lose! Score was: " + str(points))
for p in process_workers:
        p.terminate()

Computer move: ((7, 1), 'black')
  0 1 2 3 4 5 6 7 8
0 + ◯ ◯ + + + + + +
1 + ● ◯ ● ● + + + +
2 + ◯ ● ◯ ◯ + + + +
3 + ◯ ● ● + + + + +
4 + ◯ ◯ ● ● ● ● + +
5 + ● ● ◯ ● ◯ ● + +
6 + + ● ◯ ◯ ◯ ● ◯ +
7 + ● ◯ ● ◯ + ◯ ● +
8 + + + + + + + + +
Enter move as 'x,y' or 'pass': 


 8,3
