In [198]:
from copy import deepcopy
import numpy as np

def equal_arrays(ar1, ar2):
    if len(ar1) != len(ar2):
        return False
    for i in range(len(ar1)):
        if ar1[i] != ar2[i]:
            return False
    return True

class XOX_Game():
    def __init__(self, board = None, player=1):
        if board is None:
            self.board = [[0,0,0],[0,0,0],[0,0,0]]
        else:
            self.board = board
        self.player = player
    def is_terminal(self):
        # Check if all of the board is full or not
        not_found_empty = True
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 0:
                    not_found_empty = False
                    break
        if not_found_empty:
            return True
        # Check rows
        for i in range(3):
            if equal_arrays(self.board[i], [1,1,1]) or equal_arrays(self.board[i], [2,2,2]):
                return True
        # Check columns
        transposed_board = list(np.transpose(np.array(self.board)))
        
        for i in range(3):
            if equal_arrays(transposed_board[i], [1,1,1]) or equal_arrays(transposed_board[i], [2,2,2]):
                return True
        # Check diagonals
        if self.board[0][0] == self.board[1][1] and self.board[2][2] == self.board[1][1] and (self.board[1][1] in [1,2]):
            return True
        if self.board[0][2] == self.board[1][1] and self.board[2][0] == self.board[1][1] and (self.board[1][1] in [1,2]):
            return True
        return False
    
    
    
    def get_score(self):
        # Check rows
        x_wins = 10
        o_wins = -10
        draw = 0
        
        for i in range(3):
            if equal_arrays(self.board[i], [1,1,1]):
                return x_wins
            elif equal_arrays(self.board[i], [2,2,2]):
                return o_wins
        
        # Check columns
        transposed_board = list(np.transpose(np.array(self.board)))
        
        for i in range(3):
            if equal_arrays(transposed_board[i], [1,1,1]):
                return x_wins
            elif equal_arrays(transposed_board[i], [2,2,2]):
                return o_wins
            
        # Check diagonals
        if self.board[0][0] == self.board[1][1] and self.board[2][2] == self.board[1][1] and (self.board[1][1] in [1,2]):
            if self.board[1][1] == 1:
                return x_wins
            else:
                return o_wins
        if self.board[0][2] == self.board[1][1] and self.board[2][0] == self.board[1][1] and (self.board[1][1] in [1,2]):
            if self.board[1][1] == 1:
                return x_wins
            else:
                return o_wins
        return draw
    def all_actions(self):
        if self.is_terminal():
            return []
        actions = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 0:
                    actions.append((i, j))
        return actions
    def play_action(self, action):
        def toggler(num):
            if num == 1:
                return 2
            return 1
        board = deepcopy(self.board)
        board[action[0]][action[1]] = self.player
        return XOX_Game(board, toggler(self.player))
    def all_states(self):
        actions = self.all_actions()
        states = []
        for action in actions:
            states.append(self.play_action(action))
        return states
                
    def __repr__(self):
        resArr = [['-','-','-'],['-','-','-'],['-','-','-']]
        
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 1:
                    resArr[i][j] = 'X'
                elif self.board[i][j] == 2:
                    resArr[i][j] = 'O'
        for i in range(3):
            resArr[i] = str(resArr[i])
        return f"Player {self.player}:\n" + "\n".join(resArr) 
            
      
    
        

In [199]:
g = XOX_Game()


In [200]:
print(g)

Player 1:
['-', '-', '-']
['-', '-', '-']
['-', '-', '-']


In [201]:
g.all_states()[0].all_states()[3].all_states()[0].all_states()[1].all_states()[0].get_score()

10

In [202]:
# works fine...
# Now, the node & tree classes.


In [203]:
from math import sqrt
from math import log
from random import choice

class MCTSNode():
    def __init__(self, parent, state):
        self.parent = parent
        self.state = state
        self.visitCount = 0
        self.score = 0
        self.children = []
    def visit(self):
        self.visitCount += 1
    def setChildren(self, children):
        self.children = children
    def increment_score(self, score):
        self.score += score
    def __repr__(self):
        return f"V:{self.visitCount}, S:{self.score}, \n" + str(self.state)
        
def backpropagate(leaf, score):
    node = leaf
    while not node is None:
        node.visit()
        node.increment_score(score)
        node = node.parent
        

def uct(totalVisit, winScore, nodeVisit, coeff=sqrt(2)):
    if nodeVisit == 0:
        return float('inf')
    return (winScore / nodeVisit) + coeff * sqrt(log(totalVisit) / nodeVisit)

def selection(node):
    children = node.children
    #print(children)
    bestScore = 0
    bestChild = None
    for child in children:
        newScore = uct(node.visitCount, child.score, child.visitCount)
        if newScore > bestScore:
            bestScore = newScore
            bestChild = child
    return bestChild

def randomPlay(node):
    tempState = deepcopy(node.state)
    while not tempState.is_terminal():
        try:
            tempState = choice(tempState.all_states())
        except IndexError:
            print("What?")
            print(tempState)
    return tempState.get_score()

def expand(node):
    newStates = node.state.all_states()
    children = [MCTSNode(parent = node, state = s) for s in newStates]
    node.setChildren(children)
    
def descend(node):
    tempNode = node
    while len(tempNode.children) > 0:
        tempNode = selection(tempNode)
    return tempNode



In [204]:
def MCTS(gamestate, limit = 10000):
    # Iteration limit of 10k
    node = MCTSNode(parent = None, state = gamestate)
    iteration_count = 0
    while iteration_count < limit:
        iteration_count += 1
        node_to_expand = descend(node)
        if not node_to_expand.state.is_terminal():
            expand(node_to_expand)
            to_explore = choice(node_to_expand.children)
            result = randomPlay(to_explore)
            backpropagate(to_explore, result)
        else:
            backpropagate(node, node.state.get_score())
    return selection(node)
            

In [205]:
MCTS(gamestate = XOX_Game())

V:57, S:200, 
Player 2:
['-', 'X', '-']
['-', '-', '-']
['-', '-', '-']

In [208]:
def play():
    state = XOX_Game()
    print("Welcome to the XOX game! You know the drill.")
    print("First input the row, then the column of your move.")
    while not state.is_terminal():
        print(state)
        print("Computer plays...")
        state = MCTS(limit = 10000, gamestate = state).state
        print(state)
        if state.is_terminal():
            break
        print("Row: ")
        i = int(input())
        print("Column: ")
        j = int(input())
        state = state.play_action((i, j))
    if state.get_score() == 10:
        print("Com wins!")
    elif state.get_score() == 0:
        print("Draw!")
    else:
        print("You... win? What?")
    return

In [209]:
play()

Welcome to the XOX game! You know the drill.
First input the row, then the column of your move.
Player 1:
['-', '-', '-']
['-', '-', '-']
['-', '-', '-']
Computer plays...
Player 2:
['-', '-', '-']
['-', '-', '-']
['-', '-', 'X']
Row: 
1
Column: 
1
Player 1:
['-', '-', '-']
['-', 'O', '-']
['-', '-', 'X']
Computer plays...
Player 2:
['-', '-', '-']
['-', 'O', '-']
['-', 'X', 'X']
Row: 
0
Column: 
0
Player 1:
['O', '-', '-']
['-', 'O', '-']
['-', 'X', 'X']
Computer plays...
Player 2:
['O', '-', '-']
['-', 'O', '-']
['X', 'X', 'X']
Com wins!
