In [2]:
# The same as before, but the winning value computation is slightly changed.
# Since we really care about the "possible win rate", that is, the times
# we've won out of all the games below, we will be updating the UCB computation,
# the backpropagation and the score calculation.
# See: https://en.wikipedia.org/wiki/Monte_Carlo_tree_search#Principle_of_operation
# I also want to test whether the AI plays to a draw against itself. At least,
# this might be a step in the right direction regarding testing the algorithm.

In [3]:
from copy import deepcopy
import numpy as np

def add_score(prev_score, to_add):
    return (prev_score[0] + to_add[0], prev_score[1] + to_add[1])

def equal_arrays(ar1, ar2):
    if len(ar1) != len(ar2):
        return False
    for i in range(len(ar1)):
        if ar1[i] != ar2[i]:
            return False
    return True

class XOX_Game():
    def __init__(self, board = None, player=1):
        if board is None:
            self.board = [[0,0,0],[0,0,0],[0,0,0]]
        else:
            self.board = board
        self.player = player
    def is_terminal(self):
        # Check if all of the board is full or not
        not_found_empty = True
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 0:
                    not_found_empty = False
                    break
        if not_found_empty:
            return True
        # Check rows
        for i in range(3):
            if equal_arrays(self.board[i], [1,1,1]) or equal_arrays(self.board[i], [2,2,2]):
                return True
        # Check columns
        transposed_board = list(np.transpose(np.array(self.board)))
        
        for i in range(3):
            if equal_arrays(transposed_board[i], [1,1,1]) or equal_arrays(transposed_board[i], [2,2,2]):
                return True
        # Check diagonals
        if self.board[0][0] == self.board[1][1] and self.board[2][2] == self.board[1][1] and (self.board[1][1] in [1,2]):
            return True
        if self.board[0][2] == self.board[1][1] and self.board[2][0] == self.board[1][1] and (self.board[1][1] in [1,2]):
            return True
        return False
    
    
    
    def get_score(self):
        # Scores are in the format:
        # (x_wincount, o_wincount)
        x_wins = (1,0)
        o_wins = (0,1)
        draw = (0,0)
        
        # Check rows
        
        for i in range(3):
            if equal_arrays(self.board[i], [1,1,1]):
                return x_wins
            elif equal_arrays(self.board[i], [2,2,2]):
                return o_wins
        
        # Check columns
        transposed_board = list(np.transpose(np.array(self.board)))
        
        for i in range(3):
            if equal_arrays(transposed_board[i], [1,1,1]):
                return x_wins
            elif equal_arrays(transposed_board[i], [2,2,2]):
                return o_wins
            
        # Check diagonals
        if self.board[0][0] == self.board[1][1] and self.board[2][2] == self.board[1][1] and (self.board[1][1] in [1,2]):
            if self.board[1][1] == 1:
                return x_wins
            else:
                return o_wins
        if self.board[0][2] == self.board[1][1] and self.board[2][0] == self.board[1][1] and (self.board[1][1] in [1,2]):
            if self.board[1][1] == 1:
                return x_wins
            else:
                return o_wins
        return draw
    def all_actions(self):
        if self.is_terminal():
            return []
        actions = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 0:
                    actions.append((i, j))
        return actions
    def play_action(self, action):
        def toggler(num):
            if num == 1:
                return 2
            return 1
        board = deepcopy(self.board)
        board[action[0]][action[1]] = self.player
        return XOX_Game(board, toggler(self.player))
    def all_states(self):
        actions = self.all_actions()
        states = []
        for action in actions:
            states.append(self.play_action(action))
        return states
                
    def __repr__(self):
        resArr = [['-','-','-'],['-','-','-'],['-','-','-']]
        
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == 1:
                    resArr[i][j] = 'X'
                elif self.board[i][j] == 2:
                    resArr[i][j] = 'O'
        for i in range(3):
            resArr[i] = str(resArr[i])
        return f"Player {self.player}:\n" + "\n".join(resArr) 
            
      

In [4]:
g = XOX_Game()
print(g)

Player 1:
['-', '-', '-']
['-', '-', '-']
['-', '-', '-']


In [5]:
g.all_states()[0].all_states()[3].all_states()[0].all_states()[1].all_states()[0].get_score()

(1, 0)