## Bài 14: Monte Carlo Tree Search

### Cài đặt MCTS cho trò chơi XO

In [1]:
import math
import random
import numpy as np

In [None]:
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.current_player = 1

    def clone(self):
        new_game = TicTacToe()
        new_game.board = self.board.copy()
        new_game.current_player = self.current_player
        return new_game
    
    def available_moves(self):
        return [(i, j) for i in range(3) for j in range(3) if self.board[i, j] == 0]
    
    def make_move(self, move):
        i, j = move
        self.board[i, j] = self.current_player
        self.current_player *= -1

    def check_winner(self):
        for i in range(3):
            if abs(sum(self.board[i, :])) == 3:
                return np.sign(sum(self.board[i, :]))
            if abs(sum(self.board[:, i])) == 3:
                return np.sign(sum(self.board[:, i]))
        diag1, diag2 = sum(self.board.diagonal()), sum(np.fliplr(self.board).diagonal())
        if abs(diag1) == 3:
            return np.sign(diag1)
        if abs(diag2) == 3:
            return np.sign(diag2)
        if not self.available_moves():
            return 0
        return None
    
class Node:
    def __init__(self, game, parent=None, move=None):
        self.game = game
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.wins = 0

    def is_fully_expanded(self):
        return len(self.children) == len(self.game.available_moves())
    
    def best_child(self, c_param=1.4):
        choices = []
        for child in self.children:
            uct = (child.wins / (child.visits + 1e-6) +
                   c_param * math.sqrt(math.log(self.visits + 1) / (child.visits + 1e-6)))
            choices.append((uct, child))
        return max(choices, key=lambda x: x[0])[1]
    
def mcts(root_game, iterations=1000):
    root = Node(root_game)
    for _ in range(iterations):
        node = root
        game = root_game.clone()
        while node.children and node.is_fully_expanded():
            node = node.best_child()
            game.make_move(node.move)
        winner = game.check_winner()
        if winner is None:
            moves = game.available_moves()
            unexplored = [m for m in moves if m not in [
                c.move for c in node.children]]
            if unexplored:
                move = random.choice(unexplored)
                game.make_move(move)
                child_node = Node(game.clone(), parent=node, move=move)
                node.children.append(child_node)
                node = child_node
        while game.check_winner() is None:
            game.make_move(random.choice(game.available_moves()))
        result = game.check_winner()
        while node is not None:
            node.visits += 1
            if result == node.game.current_player * -1:
                node.wins += 1
            elif result == 0:
                node.wins += 0.
            node = node.parent
    best_move = max(root.children, key=lambda c: c.visits).move
    return best_move

In [9]:
game = TicTacToe()
while game.check_winner() is None:
    if game.current_player == 1:
        move = mcts(game, iterations=500)
    else:
        move = random.choice(game.available_moves())
    game.make_move(move)
    print(game.board)
    print("------")

winner = game.check_winner()
if winner == 1:
    print("X thắng!")
elif winner == -1:
    print("O thắng!")
else:
    print("Hòa!")

[[0 0 0]
 [0 1 0]
 [0 0 0]]
------
[[ 0  0 -1]
 [ 0  1  0]
 [ 0  0  0]]
------
[[ 0  1 -1]
 [ 0  1  0]
 [ 0  0  0]]
------
[[-1  1 -1]
 [ 0  1  0]
 [ 0  0  0]]
------
[[-1  1 -1]
 [ 0  1  0]
 [ 0  1  0]]
------
X thắng!
