In [4]:
import random
import numpy as np

In [7]:
class TicTacToe:
    def __init__(self):
        self.board = [[0 for _ in range(3)] for _ in range(3)]
        self.current_player = 1

    def reset(self):
        self.board = [[0 for _ in range(3)] for _ in range(3)]
        self.current_player = 1

    def render(self):
        players = {1: 'X', -1: 'O', 0: ' '}
        for i, row in enumerate(self.board):
            print(" " + " | ".join([players[cell] for cell in row]))
            if i < 2:
                print("---+---+---")

    def available_moves(self):
        return [(r, c) for r in range(3) for c in range(3) if self.board[r][c] == 0]

    def make_move(self, row, col):
        if self.board[row][col] == 0:
            self.board[row][col] = self.current_player
            self.current_player *= -1
            return True
        return False

    def check_winner(self):
        board = np.array(self.board)
        row_sum = np.sum(board, axis=1)
        col_sum = np.sum(board, axis=0)
        diag1_sum = np.trace(board)
        diag2_sum = np.trace(np.fliplr(board))
        all_sums = np.concatenate((row_sum, col_sum, [diag1_sum], [diag2_sum]))

        if 3 in all_sums:
            return 1
        if -3 in all_sums:
            return -1
        if not (board == 0).any():
            return 0
        return None

    def clone(self):
        cloned_game = TicTacToe()
        cloned_game.board = [row[:] for row in self.board]
        cloned_game.current_player = self.current_player
        return cloned_game

    def random_play(self):
        while self.check_winner() is None:
            moves = self.available_moves()
            move = random.choice(moves)
            self.make_move(*move)

class MonteCarloPrediction:
    def __init__(self, game, simulations_per_move=1000):
        self.game = game
        self.simulations_per_move = simulations_per_move

    def simulate(self):
        moves = self.game.available_moves()
        win_counts = {move: 0 for move in moves}
        total_counts = {move: 0 for move in moves}
        opponent_win_counts = {move: 0 for move in moves}

        current_player = self.game.current_player

        for move in moves:
            for _ in range(self.simulations_per_move):
                clone_game = self.game.clone()
                clone_game.make_move(*move)
                clone_game.random_play()
                winner = clone_game.check_winner()

                total_counts[move] += 1
                if winner == current_player:
                    win_counts[move] += 1
                elif winner == current_player * -1:
                    opponent_win_counts[move] += 1

        # 승리 확률 계산: 자신의 승리 확률 - 상대방의 승리 확률
        win_rates = {move: (win_counts[move] - opponent_win_counts[move]) / total_counts[move] for move in moves}

        # 가장 높은 승리/수비 균형을 가진 움직임 선택
        print(win_rates)
        best_move = max(win_rates, key=win_rates.get)
        return best_move



In [8]:
class MonteCarloPrediction:
    def __init__(self, game, simulations_per_move=100):
        self.game = game
        self.simulations_per_move = simulations_per_move

    def simulate(self):
        moves = self.game.available_moves()
        win_counts = {move: 0 for move in moves}
        total_counts = {move: 0 for move in moves}
        opponent_win_counts = {move: 0 for move in moves}

        current_player = self.game.current_player

        for move in moves:
            for _ in range(self.simulations_per_move):
                clone_game = self.game.clone()
                clone_game.make_move(*move)
                clone_game.random_play()
                winner = clone_game.check_winner()

                total_counts[move] += 1
                if winner == current_player:
                    win_counts[move] += 1
                elif winner == current_player * -1:
                    opponent_win_counts[move] += 1

        # 승리 확률 계산: 자신의 승리 확률 - 상대방의 승리 확률
        win_rates = {move: (win_counts[move] - opponent_win_counts[move]) / total_counts[move] for move in moves}

        # 가장 높은 승리/수비 균형
        print(win_rates)
        best_move = max(win_rates, key=win_rates.get)
        return best_move

In [10]:
game = TicTacToe()
player_X = MonteCarloPrediction(game)
player_O = MonteCarloPrediction(game)

while game.check_winner() is None:
    game.render()
    if game.current_player == 1:
        print("Player X's turn")
        row, col = player_X.simulate()
    else:
        print("Player O's turn")
        row, col = player_O.simulate()

    game.make_move(row, col)

game.render()
winner = game.check_winner()
if winner == 1:
    print("Player X wins!")
elif winner == -1:
    print("Player O wins!")
else:
    print("It's a draw!")

   |   |  
---+---+---
   |   |  
---+---+---
   |   |  
Player X's turn
{(0, 0): 0.17, (0, 1): 0.27, (0, 2): 0.53, (1, 0): 0.12, (1, 1): 0.49, (1, 2): 0.18, (2, 0): 0.29, (2, 1): 0.3, (2, 2): 0.22}
   |   | X
---+---+---
   |   |  
---+---+---
   |   |  
Player O's turn
{(0, 0): -0.26, (0, 1): -0.34, (1, 0): -0.43, (1, 1): -0.13, (1, 2): -0.46, (2, 0): -0.26, (2, 1): -0.4, (2, 2): -0.41}
   |   | X
---+---+---
   | O |  
---+---+---
   |   |  
Player X's turn
{(0, 0): 0.23, (0, 1): 0.33, (1, 0): 0.16, (1, 2): 0.17, (2, 0): -0.13, (2, 1): 0.17, (2, 2): 0.28}
   | X | X
---+---+---
   | O |  
---+---+---
   |   |  
Player O's turn
{(0, 0): 0.34, (1, 0): -0.16, (1, 2): -0.15, (2, 0): -0.42, (2, 1): -0.44, (2, 2): -0.12}
 O | X | X
---+---+---
   | O |  
---+---+---
   |   |  
Player X's turn
{(1, 0): -0.28, (1, 2): -0.2, (2, 0): -0.65, (2, 1): -0.93, (2, 2): 0.26}
 O | X | X
---+---+---
   | O |  
---+---+---
   |   | X
Player O's turn
{(1, 0): 0.1, (1, 2): 0.42, (2, 0): -0.16, (2, 1): -