In [136]:
from src.heuristics import TicTacToeHeuristic
from src import TicTacToe
import time 



game_cls = TicTacToe()
heuristic = TicTacToeHeuristic()

def min_max(game: TicTacToe, is_max_player, cloned_game, depth):
    start_time = time.time()
    if game.is_terminated() and depth <= 3:
        winner = game.get_winner()
        if winner == cloned_game.player:
            return 10
        elif winner == 0:
            return 0
        else:
            return -10
    if depth > 3:
        return heuristic(game.get_state(), cloned_game.player)

    best_score = -float('inf') if is_max_player else float('inf')
    for action in game.get_actions():
        new_game = game.clone()
        new_game.move(action)
        score = min_max(new_game, not is_max_player, cloned_game, depth+1)
        if (is_max_player and score > best_score) or (not is_max_player and score < best_score):
            best_score = score

    end_time = time.time()
    print("min_max 运行时间：" + str(end_time - start_time))       
    return best_score
    

def get_all_Qs(state: tuple[int, ...], player: int, action_space: set[int]) -> dict[int, float]:
    function_start_time = time.time()  # 记录函数开始时间
    q_values = {}
    for action in action_space:
        loop_start_time = time.time()  # 记录循环开始时间
    
        # 创建游戏状态
        state_creation_start = time.time()
        new_game = game_cls.from_state(state, player)
        cloned_game = new_game.clone()
        state_creation_end = time.time()
    
        # 执行动作
        action_execution_start = time.time()
        new_game.move(action)
        action_execution_end = time.time()
    
        # 计算分数
        score_calculation_start = time.time()
        score = min_max(new_game, False, cloned_game, depth=0)
        score_calculation_end = time.time()
    
        loop_end_time = time.time()  # 记录循环结束时间
        q_values[action] = score
    
        # 打印每部分的运行时间
        print(f"Action {action}:")
        print(f"  - 状态创建时间: {state_creation_end - state_creation_start}秒")
        print(f"  - 动作执行时间: {action_execution_end - action_execution_start}秒")
        print(f"  - 分数计算时间: {score_calculation_end - score_calculation_start}秒")
        print(f"  - 总循环时间: {loop_end_time - loop_start_time}秒")
    
    function_end_time = time.time()  # 记录函数结束时间
    print("get_all_Qs 总运行时间：" + str(function_end_time - function_start_time))


def update_Q(state: tuple[int, ...], player: int, action: int, Q: float) -> None:
    raise NotImplementedError


start_time = time.time()
get_all_Qs(game_cls.get_state(), game_cls.player, game_cls.get_actions())
print(get_all_Qs(game_cls.get_state(), game_cls.player, game_cls.get_actions()))
end_time = time.time()
print("主程序总运行时间：" + str(end_time - start_time))

min_max 运行时间：0.0070531368255615234
min_max 运行时间：0.0007729530334472656
min_max 运行时间：0.0005209445953369141
min_max 运行时间：0.0010728836059570312
min_max 运行时间：0.0008089542388916016
min_max 运行时间：0.0006480216979980469
min_max 运行时间：0.011262893676757812
min_max 运行时间：0.0006320476531982422
min_max 运行时间：0.00055694580078125
min_max 运行时间：0.0005438327789306641
min_max 运行时间：0.0005481243133544922
min_max 运行时间：0.0005638599395751953
min_max 运行时间：0.0005640983581542969
min_max 运行时间：0.003492116928100586
min_max 运行时间：0.0005440711975097656
min_max 运行时间：0.0005259513854980469
min_max 运行时间：0.0005321502685546875
min_max 运行时间：0.0005228519439697266
min_max 运行时间：0.0004990100860595703
min_max 运行时间：0.0005841255187988281
min_max 运行时间：0.003283977508544922
min_max 运行时间：0.0004961490631103516
min_max 运行时间：0.0004088878631591797
min_max 运行时间：0.00041294097900390625
min_max 运行时间：0.00039124488830566406
min_max 运行时间：0.00040602684020996094
min_max 运行时间：0.00041294097900390625
min_max 运行时间：0.0025920867919921875
min_max 运行时间：0.000398

In [137]:
import time
class AdversarialSearchAgent:
    def __init__(self, initial_state):
        self.initial_state = initial_state

    def __max_utility_actions(self, state, player, depth=None):
        if state.terminal_test():
            return state.utility(player), set()
        if depth is not None and depth == 0:
            return state.eval(player), set()
        utility_map = dict()
        best_set = set()
        max_utility = None
        for action in state.actions():
            result = state.result(action)
            next_depth = None if depth is None else depth - 1
            min_utility, _ = self.__min_utility_actions(result, player, next_depth)
            utility_map[action] = min_utility
            print(utility_map)
            if max_utility is None:
                max_utility = min_utility
            elif max_utility < min_utility:
                max_utility = min_utility
        for action, utility in utility_map.items():
            if utility == max_utility:
                best_set.add(action)
        return max_utility, best_set

    def __min_utility_actions(self, state, player, depth=None):
        if state.terminal_test():
            return state.utility(player), set()
        if depth is not None and depth == 0:
            return state.eval(), set()
        utility_map = dict()
        best_set = set()
        min_utility = None
        for action in state.actions():
            result = state.result(action)
            next_depth = None if depth is None else depth - 1
            max_utility, _ = self.__max_utility_actions(result, player, next_depth)
            utility_map[action] = max_utility
            if min_utility is None:
                min_utility = max_utility
            elif min_utility > max_utility:
                min_utility = max_utility
        for action, utility in utility_map.items():
            if utility == min_utility:
                best_set.add(action)
        return min_utility, best_set

    def minimax_search(self):
        return self.__max_utility_actions(self.initial_state,
                                          self.initial_state.player())

    def minimax_values(self):
        value_distribution = dict()
        for action in self.initial_state.actions():
            sub_initial_state = self.initial_state.result(action).copy
            value_distribution[action] = self.__min_utility_actions(
                sub_initial_state,
                self.initial_state.player()
            )[0]
        return value_distribution

    def h_minimax_search(self):
        return self.__max_utility_actions(self.initial_state,
                                          self.initial_state.player, 4)

    def h_minimax_values(self):
        value_distribution = dict()
        start_time = time.time()
        for action in self.initial_state.actions():
            sub_initial_state = self.initial_state.result(action).copy
            value_distribution[action] = self.__min_utility_actions(
                sub_initial_state,
                self.initial_state.player, 3
            )[0]
            end_time = time.time()
            print(action, end_time - start_time)
        return value_distribution

class TicTacToe:
    all_actions = [*range(1, 10)]
    def __init__(self):
        self.player = 1
        self.__list = [0] * 9
        self.history = []

    def __str__(self):
        format_str = '\n+---+---+---+\n|{:^3}|{:^3}|{:^3}|' * 3 + '\n+---+---+---+\n'
        return format_str.format(*self.tuple)

    def __eq__(self, other):
        return self.__list == other.__list

    # iterate through all possible lines
    def __iter__(self):
        for i in (0, 1, 2):
            yield self.__list[i], self.__list[i + 3], self.__list[i + 6]
        for i in (0, 3, 6):
            yield self.__list[i], self.__list[i + 1], self.__list[i + 2]
        for i in (0,):
            yield self.__list[i], self.__list[i + 4], self.__list[i + 8]
        for i in (2,):
            yield self.__list[i], self.__list[i + 2], self.__list[i + 4]

    # return a tuple representation of current state
    @property
    def tuple(self):
        temp_list = [*range(1, 10)]
        if self.__list == [0] * 9:
            return tuple(temp_list)
        for i, player in enumerate(self.__list):
            if player == 0:
                temp_list[i] = ' '
            if player == 1:
                temp_list[i] = 'X'
            if player == 2:
                temp_list[i] = 'O'
        return tuple(temp_list)

    # return a deep copy
    @property
    def copy(self):
        copy = TicTacToe()
        copy.player = self.player
        copy.__list = [*self.__list]
        copy.history = [*self.history]
        return copy

    # return all available actions under current state
    @property
    def available_actions(self):
        actions = set()
        for action in TicTacToe.all_actions:
            if self.winner is None and self.can_choose(action):
                actions.add(action)
        return actions

    # return winner, 0 means tie. If there is no winner yet, return None.
    @property
    def winner(self):
        for first, second, third in self:
            if first == second == third != 0:
                return first
        for i in self.__list:
            if i == 0:
                return None
        return 0

    # check if a position is choosable
    def can_choose(self, position):
        if not 1 <= position <= 9:
            return False
        return self.__list[position - 1] == 0

    # choose a position
    def choose(self, position):
        if self.can_choose(position):
            self.__list[position - 1] = self.player
            self.history += [(self.player, position)]
            self.player = 3 - self.player

    def actions(self):
        return self.available_actions
    
    def result(self, action):
        copy = self.copy
        copy.choose(action)
        return copy
    
    def terminal_test(self):
        return self.winner is not None
    
    def utility(self, player):
        return self.winner if self.winner == 0 or self.winner is None else 10 if self.winner == player else -10
    
    def eval(self, player):
        p1, p2, a1, a2 = [0] * 4
        for line in self:
            p_count = line.count(player)
            a_count = line.count(3 - player)
            if p_count == 2 and a_count == 0:
                p2 += 1
            if p_count == 1 and a_count == 0:
                p1 += 1
            if p_count == 0 and a_count == 2:
                a2 += 1
            if p_count == 0 and a_count == 1:
                a1 += 1
        return 3 * p2 + p1 - (3 * a2 + a1)


class TicTacToeGame:

    def __init__(self):
        self.player1 = AI()
        self.player2 = AI()
        self.player1.name = 'Computer 1'
        self.player2.name = 'Computer 2'
        self.state = TicTacToe()

    def turn(self, player):
        player.choose(self.state)
        print(self.state)
        return self.terminal_test()

    def terminal_test(self):
        if self.state.winner == 0:
            print('Tie!')
            return True
        if self.state.winner == 1:
            print('{} win!'.format(self.player1.name))
            return True
        if self.state.winner == 2:
            print('{} win!'.format(self.player2.name))
            return True
        return False

    def start(self):
        print('Game started!')
        print(self.state)
        while True:
            if self.turn(self.player1):
                break
            if self.turn(self.player2):
                break


class AI:
    def __init__(self):
        self.evaluated = False
        
    def algorithm(self, agent):
        return agent.h_minimax_search()[1], agent.h_minimax_values()

    def choose(self, state):
        print("{}'s turn: ".format(self.name))
        agent = AdversarialSearchAgent(state.copy)
        result = self.algorithm(agent)
        choices, evaluations = result[0], result[1]
        if not self.evaluated:
            temp = [''] * 9
            for key in evaluations:
                temp[key-1] = evaluations[key]
            format_str = '\n+---+---+---+\n|{:^3}|{:^3}|{:^3}|' * 3 + '\n+---+---+---+\n'
            print("\nMinimax evaluations:")
            print(format_str.format(*temp))
            self.evaluated = True
        choice = min(choices)
        state.choose(choice)


In [53]:
game = TicTacToeGame()

In [54]:
game.start()

Game started!

+---+---+---+
| 1 | 2 | 3 |
+---+---+---+
| 4 | 5 | 6 |
+---+---+---+
| 7 | 8 | 9 |
+---+---+---+

Computer 1's turn: 
{3: -2}
{3: -2, 4: -1}
{3: -2, 4: -1, 5: 1}
{3: -2, 4: -1, 5: 1, 6: -2}
{3: -2, 4: -1, 5: 1, 6: -2, 7: 0}
{3: -2, 4: -1, 5: 1, 6: -2, 7: 0, 8: 0}
{3: -2, 4: -1, 5: 1, 6: -2, 7: 0, 8: 0, 9: -2}
{2: -4}
{2: -4, 4: -3}
{2: -4, 4: -3, 5: -1}
{2: -4, 4: -3, 5: -1, 6: -3}
{2: -4, 4: -3, 5: -1, 6: -3, 7: 0}
{2: -4, 4: -3, 5: -1, 6: -3, 7: 0, 8: -3}
{2: -4, 4: -3, 5: -1, 6: -3, 7: 0, 8: -3, 9: -3}
{2: -1}
{2: -1, 3: 0}
{2: -1, 3: 0, 5: 1}
{2: -1, 3: 0, 5: 1, 6: 0}
{2: -1, 3: 0, 5: 1, 6: 0, 7: -2}
{2: -1, 3: 0, 5: 1, 6: 0, 7: -2, 8: -2}
{2: -1, 3: 0, 5: 1, 6: 0, 7: -2, 8: -2, 9: -2}
{2: -4}
{2: -4, 3: -2}
{2: -4, 3: -2, 4: -4}
{2: -4, 3: -2, 4: -4, 6: -3}
{2: -4, 3: -2, 4: -4, 6: -3, 7: -2}
{2: -4, 3: -2, 4: -4, 6: -3, 7: -2, 8: -3}
{2: -4, 3: -2, 4: -4, 6: -3, 7: -2, 8: -3, 9: -3}
{2: -2}
{2: -2, 3: 0}
{2: -2, 3: 0, 4: -1}
{2: -2, 3: 0, 4: -1, 5: 0}
{2: -2, 3: 0

In [71]:
import numpy as np
from copy import deepcopy
from typing import Callable, Optional


class TicTacToe():

    @classmethod
    def from_state(cls, state: tuple[int, ...], player: int):
        obj = TicTacToe()
        obj.board = np.array(state)
        obj.player = player
        return obj

    @classmethod
    def eval(cls, state, player):
        pass

    def __init__(self, start_player=1, default_state_formatter: Callable[[tuple[int, ...]], str] = str):
        self.board = np.zeros(9)
        self.player = start_player
        self.default_state_formatter = default_state_formatter
        self.action_history = []

    def move(self, action):
        if self.board[action] != 0:
            raise Exception("Invalid move")
        self.board[action] = self.player
        self.player = -self.player
        self.action_history.append(action)

    def agent_move(self, policy):
        best_action = policy(self.get_state(), self.player, self.get_actions())
        self.move(best_action)
        return best_action

    def reset(self, start_player=1):
        self.board = np.zeros(9)
        self.player = start_player

    def render(self, state_formatter: Optional[Callable[[tuple[int, ...]], str]] = None):
        formatter = state_formatter or self.default_state_formatter
        print(formatter(self.get_state()), flush=True)

    def get_state(self) -> tuple[int, ...]:
        return tuple(self.board.astype(int).tolist())

    def get_actions(self):
        return set(np.where(self.board == 0)[0].tolist())

    def get_winner(self):
        board = self.board.reshape([3,3])

        for i in range(3):
            if abs(board[i].sum()) == 3:
                return board[i][0]
            if abs(board[:, i].sum()) == 3:
                return board[0][i]

        for j in range(3):
            if abs(board.diagonal().sum()) == 3:
                return board[0, 0]

            if abs(np.fliplr(board).diagonal().sum()) == 3:
                return int(board[0, 2])
        return 0

    def is_terminated(self):
        return not self.get_actions() or self.get_winner()

    def update_state(self, state, player):
        self.board = np.array(state, dtype=int)
        self.player = player

    def clone(self):
        return deepcopy(self)

    def last_player(self):
        return -self.player

    def apply_action(self, action):
        if self.board[action] != 0:
            raise Exception("Invalid move")
        self.board[action] = self.player
        self.player = -self.player
        self.action_history.append(action)

    def undo_action(self):
        if not self.action_history:
            raise Exception("No actions to undo")
        last_action = self.action_history.pop()
        self.board[last_action] = 0
        self.player = -self.player
        

In [127]:
from src.heuristics import TicTacToeHeuristic
import time

game_cls = TicTacToe()
heuristic = TicTacToeHeuristic()

def min_max(game: TicTacToe, is_max_player, cloned_game, depth):
    if game.is_terminated():
        winner = game.get_winner()
        if winner == cloned_game.player:
            return 10
        elif winner == 0:
            return 0
        else:
            return -10
    if depth == 0:
        return heuristic(game.get_state(), cloned_game.player)

    best_score = -float('inf') if is_max_player else float('inf')
    for action in game.get_actions():
        game.apply_action(action)
        score = min_max(game, not is_max_player, cloned_game, depth - 1)
        game.undo_action()
        if (is_max_player and score > best_score) or (not is_max_player and score < best_score):
            best_score = score
    return best_score


def get_all_Qs(state: tuple[int, ...], player: int, action_space: set[int]) -> dict[int, float]:
    q_values = {}
    for action in action_space:
        new_game = game_cls.from_state(state, player)
        cloned_game = new_game.clone()
        new_game.move(action)
        score = min_max(new_game, False, cloned_game, depth=3)
        q_values[action] = score
    return q_values


In [128]:
game_cls.reset()
game_cls.move(4)
game_cls.render()

(0, 0, 0, 0, 1, 0, 0, 0, 0)


In [129]:
result = get_all_Qs(game_cls.get_state(), game_cls.player, game_cls.get_actions())
print(result)

{0: -5, 1: -7, 2: -5, 3: -7, 5: -7, 6: -5, 7: -7, 8: -5}


In [130]:
game_cls.move(0)
game_cls.render()

(-1, 0, 0, 0, 1, 0, 0, 0, 0)


In [131]:
game_cls.move(1)
game_cls.render()

(-1, 1, 0, 0, 1, 0, 0, 0, 0)


In [132]:
result = get_all_Qs(game_cls.get_state(), game_cls.player, game_cls.get_actions())
print(result)

{2: -10, 3: -10, 5: -10, 6: -10, 7: -4, 8: -10}


In [133]:
game_cls.move(7)
game_cls.render()

(-1, 1, 0, 0, 1, 0, 0, -1, 0)


In [134]:
game_cls.move(2)
game_cls.render()

(-1, 1, 1, 0, 1, 0, 0, -1, 0)


In [135]:
result = get_all_Qs(game_cls.get_state(), game_cls.player, game_cls.get_actions())
print(result)

{8: -10, 3: -10, 5: -10, 6: 10}
