## Monte Carlo Tree Search


In [16]:
import numpy as np
import math  
import random


In [17]:
class Node():
    def __init__(self, state, parent):
        self.state = state #State of the game board
        self.is_terminal = state.is_terminal()
        self.is_expanded_completly = self.is_terminal
        self.parent = parent
        self.children = {}
        self.N = 0 #Number of nodes visited
        self.Q = 0 #total simulation rewards

$UCT = \frac{Q}{N} + c\sqrt{\frac{\log{N}}{N}}$


In [None]:
class monte_carlo_tree_search():
    

    def expand(self,node):
        actions = node.state.available_actions()
        for action in actions not in node.children:
            new_node = Node(node.state.perform_action(action), node)
            node.children[action] = new_node
            if len(actions) == len(node.children):
                node.is_expanded_completly = True
                return new_node
            
    def calculate_values(self, node, c):
        max_value = - math.inf
        good_options = []

        def calculate_value(node,child,c):
            return node.state.player * ( child.Q / child.N ) + c * np.sqrt(  np.log(node.N) / child.N )

        for child in node.children.values():
            uct =calculate_value(node,child,c)
            if uct > max_value:
                max_value = uct
                good_options = [child]
            elif uct == max_value:
                good_options.append(child)
            return random.choice(good_options)


In [21]:
class Action():
    def __init__(self, player, x, y):
        self.player = player
        self.x = x
        self.y = y

In [67]:
from copy import deepcopy
from functools import reduce


In [68]:
class Tic_Tac_Toe():
    def __init__(self, size=3):
        self.size = size
        self.board = (np.zeros((size, size))).tolist()
        self.player = 1  

    def available_actions(self):
        actions = []
        for i in range(self.size):
            for j in range(self.size):
                if self.board[i][j] == 0:
                    actions.append(Action(self.player, x=i, y=j))
        return actions
    
    def play_action(self, action : Action):
        new_state = deepcopy(self)
        new_state.board[action.x][action.y] = action.player
        new_state.player = self.player * -1
        return new_state
    
    # Terminal nodes
    def is_end_state(self):
        # first chekc row
        for row in self.board:
            if abs(sum(row)) == self.size:
                return True
        # second check column
        for column in np.transpose(self.board):
            if abs(sum(column)) == self.size:
                return True
        
        # check for diagonals
        if sum(np.diag(self.board)) == self.size or sum(np.diag(np.rot90(self.board))):
            return True

        return reduce(operator.mul, sum(self.board, []), 1)
    
    def print_board(self):
        for i in self.board:
            for j in i:
                if j == -1: print('O', end='\t') # for first player
                elif j == 1: print('X', end='\t') # for second playe
                else: print('-', end='\t')
            print("")
        print('____________________________\n')

    # -1 for O and 1 for X
    def get_reward(self):
        for row in self.board:
            if abs(sum(row)) == self.board_size:
                return sum(row) / self.board_size
        for column in list(map(list, zip(*self.board))):
            if abs(sum(column)) == self.board_size:
                return sum(column) / self.board_size
        for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]:
            if abs(sum(diagonal)) == self.board_size:
                return sum(diagonal) / self.board_size
        return False

    def who_wins(self,result):
        if result == 1:
        # print("Game over")
            print("Result: X wins!!")
        elif result == -1:
        # print("Game over")
            print("Result: O wins!!")
        else:
        # print("Game over")
            print("Result: Draw!!!")


    


In [18]:
_board = (np.zeros((3, 3))).tolist()

In [63]:
_board[2][0] = 1


In [60]:

for colum in np.transpose(_board):
    print(sum(colum))

1.0
1.0
1.0


In [66]:
sum(np.diag(_board))

1.0

In [65]:
np.diag(np.rot90(_board))

array([1., 0., 1.])

In [51]:
for row in _board:
    column = [row[1]]
    print(column)
    print(sum(column))


[1]
1
[0.0]
0.0
[0.0]
0.0
