In [1]:
import random
import numpy as np
import math

In [2]:
class TicTacToe:
    def __init__(self):
        self.board = [[0 for _ in range(3)] for _ in range(3)]
        self.current_player = 1

    def reset(self):
        self.board = [[0 for _ in range(3)] for _ in range(3)]
        self.current_player = 1

    def render(self):
        players = {1: 'X', -1: 'O', 0: ' '}
        for i, row in enumerate(self.board):
            print(" " + " | ".join([players[cell] for cell in row]))
            if i < 2:
                print("---+---+---")

    def available_moves(self):
        return [(r, c) for r in range(3) for c in range(3) if self.board[r][c] == 0]

    def make_move(self, row, col):
        if self.board[row][col] == 0:
            self.board[row][col] = self.current_player
            self.current_player *= -1
            return True
        return False

    def check_winner(self):
        board = np.array(self.board)
        row_sum = np.sum(board, axis=1)
        col_sum = np.sum(board, axis=0)
        diag1_sum = np.trace(board)
        diag2_sum = np.trace(np.fliplr(board))
        all_sums = np.concatenate((row_sum, col_sum, [diag1_sum], [diag2_sum]))

        if 3 in all_sums:
            return 1
        if -3 in all_sums:
            return -1
        if not (board == 0).any():
            return 0
        return None

    def clone(self):
        cloned_game = TicTacToe()
        cloned_game.board = [row[:] for row in self.board]
        cloned_game.current_player = self.current_player
        return cloned_game

    def random_play(self):
        while self.check_winner() is None:
            moves = self.available_moves()
            move = random.choice(moves)
            self.make_move(*move)


In [3]:
class MCTSNode:
    def __init__(self, state, parent=None):
        self.state = state
        self.parent = parent
        self.children = []
        self.wins = 0
        self.visits = 0
        self.untried_moves = state.available_moves()

    def is_fully_expanded(self):
        return len(self.untried_moves) == 0

    def best_child(self, exploration_weight=1.414):
        return max(self.children, key=lambda child: child.ucb1(exploration_weight))

    def ucb1(self, exploration_weight):
        if self.visits == 0:
            return float('inf')
        return (self.wins / self.visits) + exploration_weight * math.sqrt(math.log(self.parent.visits) / self.visits)

    def expand(self):
        move = self.untried_moves.pop()
        next_state = self.state.clone()
        next_state.make_move(*move)
        child_node = MCTSNode(next_state, self)
        self.children.append(child_node)
        return child_node

    def update(self, result):
        self.visits += 1
        if self.state.current_player == -1:
            self.wins += -result
        else:
            self.wins += result


def MCTS(root, time_limit=1.0):
    import time
    start_time = time.time()

    while time.time() - start_time < time_limit:
        node = root
        # 1. Selection
        while node.is_fully_expanded() and node.children:
            node = node.best_child()

        # 2. Expansion
        if not node.is_fully_expanded():
            node = node.expand()

        # 3. Simulation
        result = simulate_random_game(node.state)

        # 4. Backpropagation
        while node is not None:
            node.update(result)
            node = node.parent

    return max(root.children, key=lambda child: child.visits).state

def simulate_random_game(state):
    while state.check_winner() is None:
        moves = state.available_moves()
        move = random.choice(moves)
        state.make_move(*move)
    return state.check_winner()


In [4]:
class MonteCarloPrediction:
    def __init__(self, game, time_limit=3.0):
        self.game = game
        self.time_limit = time_limit

    def simulate(self):
        root_node = MCTSNode(self.game.clone())
        best_move_state = MCTS(root_node, time_limit=self.time_limit)
        for move in self.game.available_moves():
            test_game = self.game.clone()
            test_game.make_move(*move)
            if test_game.board == best_move_state.board:
                return move
        return random.choice(self.game.available_moves())

def play_game():
    game = TicTacToe()

    player_X = MonteCarloPrediction(game)
    player_O = MonteCarloPrediction(game)

    while game.check_winner() is None:
        if game.current_player == 1:
            row, col = player_X.simulate()
        else:
            row, col = player_O.simulate()

        game.make_move(row, col)

    return game.check_winner()

def measure_win_rate(num_games=100):
    x_wins = 0
    o_wins = 0
    draws = 0

    for _ in range(num_games):
        result = play_game()
        if result == 1:
            x_wins += 1
        elif result == -1:
            o_wins += 1
        else:
            draws += 1

    print(f"Total games: {num_games}")
    print(f"Player X (MCTS) wins: {x_wins} ({x_wins / num_games * 100:.2f}%)")
    print(f"Player O (MCTS) wins: {o_wins} ({o_wins / num_games * 100:.2f}%)")
    print(f"Draws: {draws} ({draws / num_games * 100:.2f}%)")

measure_win_rate(10)


Total games: 10
Player X (MCTS) wins: 5 (50.00%)
Player O (MCTS) wins: 3 (30.00%)
Draws: 2 (20.00%)
