In [1]:
import copy

class TicTacToe:
    def __init__(self, board=None, player='X'):
        self.board = [
            [' ',' ',' '],
            [' ',' ',' '],
            [' ',' ',' ']
        ] if board == None else copy.deepcopy(board)
        self.player = player
    
    def __str__(self):
        return '\n-----\n'.join('|'.join(row) for row in self.board)
    
    def can_move(self, row, col):
        return self.board[row][col] == ' '
    
    def move(self, row, col):
        if not self.can_move(row, col):
            return
        self.board[row][col] = self.player
        next_state = TicTacToe(board=self.board, player=('O' if self.player == 'X' else 'X'))            
        self.board[row][col] = ' '
        return next_state
    
    def board_full(self):
        return all(self.board[r][c] != ' ' for r in range(3) for c in range(3))

    def available_moves(self):
        return [(row,col) for row in range(3) for col in range(3) if self.can_move(row,col)]
    
    def check_win(self):
        lines = [
            [(0,0),(0,1),(0,2)],
            [(1,0),(1,1),(1,2)],
            [(2,0),(2,1),(2,2)],
            [(0,0),(1,0),(2,0)],
            [(0,1),(1,1),(2,1)],
            [(0,2),(1,2),(2,2)],
            [(0,0),(1,1),(2,2)],
            [(2,0),(1,1),(0,2)]
        ]
        
        for line in lines:
            all_same = True
            cmp_r,cmp_c = line[0]
            for r,c in line:
                if self.board[r][c] != self.board[cmp_r][cmp_c]:
                    all_same = False
            if all_same and self.board[cmp_r][cmp_c] != ' ':
                return True, self.board[cmp_r][cmp_c]

        return False, ("tie" if self.board_full() else None)
    
    def is_terminal(self):
        return self.check_win()[0] or self.board_full()
    
    def reward(self, player):
        is_win, winner = self.check_win()
        if is_win:
            return 1.0 if winner == player else -1.0
        else:
            return 0


ttt = TicTacToe()
print(ttt)
print(ttt.check_win())
print(ttt.can_move(0,0))
print(ttt.available_moves())
next_ttt = ttt.move(1,1)
print(ttt)
print(next_ttt)
print(next_ttt.player)
print(TicTacToe([
    ['X','X','O'],
    ['X','0','X'],
    ['X','X','X']
]).reward('X'))

 | | 
-----
 | | 
-----
 | | 
(False, None)
True
[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]
 | | 
-----
 | | 
-----
 | | 
 | | 
-----
 |X| 
-----
 | | 
O
1.0


In [2]:
import random
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

class Tree:
    def __init__(self, state, value: np.float64 = 0.0, visits: np.float64 = 0.0, children=[], parent=None):
        self.state = copy.deepcopy(state)
        self.value = value
        self.visits = visits
        self.children = []
        self.parent = parent
    
    def __str__(self):
        return f"{str(self.state)}\nvalue = {self.value}\nvisits = {self.visits}\nchildren = {self.children}\nparent = \n{self.parent}\n"
    
    def is_leaf(self):
        return self.children == []

def UCT(parent, c=2):
#     print([(child.value + c*np.sqrt(np.divide(np.log(parent.visits),child.visits))) for child in parent.children])
    return parent.children[np.argmax([(child.value + c*np.sqrt(np.divide(np.log(parent.visits),child.visits))) for child in parent.children])]

class MCTS:
    def __init__(self, tree):
        self.tree = tree
        self.expand(self.tree)
    
    def __str__(self):
        return str(self.tree)
    
    def select(self):
        current = self.tree
        while not current.is_leaf():
            child = UCT(current,c=2) # UCT
            current = child
        return current
            
    def expand(self, leaf):
        if leaf.state.is_terminal():
            return None
        for pos in leaf.state.available_moves():
            leaf.children.append(Tree(leaf.state.move(*pos),children=[],parent=leaf))
        return leaf.children[0]

    def simulate(self, leaf):
        state = leaf.state
        while True:
            if state.is_terminal():
                print(state.reward('X'))
                return state.reward('X')
            
            action = random.choice(state.available_moves())
            print("choosing action:", action)
            state = state.move(*action)
            print("next state:\n"+str(state))

    def backup(self, node, reward):
        while node != None:
            node.value += reward
            node.visits += 1
            node = node.parent

    def SESB(self, num_rollout=500):
        leaf = self.select()
        
        if leaf.visits > 0:
            leaf = self.expand(leaf)
            if leaf == None:
                return
        
        reward = 0
        for _ in range(num_rollout):
            reward += self.simulate(leaf)
        
        self.backup(leaf, reward)

    def next(self):
        return self.tree.children[np.argmax([child.value for child in self.tree.children])]
        
    def runMCTS(self, num_iter=1000):
        for _ in range(num_iter):
            self.SESB(1)
#         tree = self.next()
#         self.tree = tree
#         print(tree)




In [None]:
'''
ttt_mcts = MCTS(Tree(TicTacToe()))
ttt_mcts.runMCTS()
print(ttt_mcts)
ttt = ttt_mcts.tree.state
print(ttt)
'''
initb = TicTacToe()
ttt_mcts.next(initb)
while not ttt.is_terminal():
    next_ttt = ttt.move(int(input()),int(input()))
    print(next_ttt)
    ttt_mcts = MCTS(Tree(next_ttt))
    print(ttt_mcts.tree)
    ttt_mcts.runMCTS()
    print(ttt_mcts)
    ttt = ttt_mcts.tree.state
    print(ttt)

choosing action: (1, 1)
next state:
X| | 
-----
 |O| 
-----
 | | 
choosing action: (2, 0)
next state:
X| | 
-----
 |O| 
-----
X| | 
choosing action: (0, 1)
next state:
X|O| 
-----
 |O| 
-----
X| | 
choosing action: (1, 2)
next state:
X|O| 
-----
 |O|X
-----
X| | 
choosing action: (0, 2)
next state:
X|O|O
-----
 |O|X
-----
X| | 
choosing action: (2, 1)
next state:
X|O|O
-----
 |O|X
-----
X|X| 
choosing action: (1, 0)
next state:
X|O|O
-----
O|O|X
-----
X|X| 
choosing action: (2, 2)
next state:
X|O|O
-----
O|O|X
-----
X|X|X
1.0
choosing action: (2, 0)
next state:
 |X| 
-----
 | | 
-----
O| | 
choosing action: (2, 2)
next state:
 |X| 
-----
 | | 
-----
O| |X
choosing action: (1, 1)
next state:
 |X| 
-----
 |O| 
-----
O| |X
choosing action: (0, 2)
next state:
 |X|X
-----
 |O| 
-----
O| |X
choosing action: (1, 2)
next state:
 |X|X
-----
 |O|O
-----
O| |X
choosing action: (1, 0)
next state:
 |X|X
-----
X|O|O
-----
O| |X
choosing action: (0, 0)
next state:
O|X|X
-----
X|O|O
-----
O| |X
choosi