In [None]:
import random
from typing import List
from game import Game
from other.context import Context
from other.move import Move
from main.collections import FastArrayList
from utils import AIUtils

class HybridMinimaxMCTS:
    MAX = 1
    MIN = -MAX
    NEUTRAL = (MIN + MAX) / 2

    def __init__(self):
        self.player = -1
        self.game = None
        self.friendly_name = "Hybrid Minimax-MCTS"
        self.evaluation_playouts = 25
        self.max_playout_depth = 50
        self.reached_depths = []
        self.spent_times = []

    def set_evaluation_playouts(self, evaluation_playouts):
        self.evaluation_playouts = evaluation_playouts

    def set_max_playout_depth(self, max_playout_depth):
        self.max_playout_depth = max_playout_depth

    def get_first_reached_depth(self):
        return self.reached_depths[0]

    def get_mean_reached_depth(self):
        return sum(self.reached_depths) // len(self.reached_depths)

    def get_mean_spent_time_seconds(self):
        spent_time_sum = sum(self.spent_times)
        return spent_time_sum / len(self.spent_times) / 1000.0

    def select_action(self, game: Game, context: Context, max_seconds: float, max_iterations: int, max_depth: int) -> Move:
        legal_moves = game.moves(context).moves()

        if not game.is_alternating_move_game():
            legal_moves = AIUtils.extract_moves_for_mover(legal_moves, self.player)

        best_move = legal_moves[0]
        best_score = self.MIN

        search_time_millis = int(max_seconds * 1000)
        start_time = AIUtils.current_time_millis()
        time_spent = 0
        iteration_time_spent = 0
        last_iteration_time_spent = 0
        depth = 0

        while (time_spent + iteration_time_spent + (iteration_time_spent - last_iteration_time_spent)
                <= search_time_millis):
            last_iteration_time_spent = iteration_time_spent
            iteration_start_time = AIUtils.current_time_millis()
            best_move = legal_moves[0]
            best_score = self.MIN

            for move in legal_moves:
                new_context = Context(context)
                new_context.game().apply(new_context, move)
                score = self.minimax(new_context, depth, self.MIN, self.MAX, False)

                if score > best_score:
                    best_move = move
                    best_score = score

            depth += 1
            iteration_time_spent = AIUtils.current_time_millis() - iteration_start_time
            time_spent = AIUtils.current_time_millis() - start_time

        self.reached_depths.append(depth)
        self.spent_times.append(time_spent)
        return best_move

    def minimax(self, context: Context, depth: int, alpha: float, beta: float, is_maximizing: bool) -> float:
        if depth == 0 or context.trial().over():
            return self.evaluate(context, is_maximizing)

        legal_moves = self.game.moves(context).moves()

        if not self.game.is_alternating_move_game():
            legal_moves = AIUtils.extract_moves_for_mover(legal_moves, self.player)

        if is_maximizing:
            max_value = self.MIN

            for move in legal_moves:
                new_context = Context(context)
                new_context.game().apply(new_context, move)
                new_value = self.minimax(new_context, depth - 1, alpha, beta, False)
                max_value = max(max_value, new_value)

                if max_value >= beta:
                    break

                alpha = max(alpha, max_value)

            return max_value
        else:
            min_value = self.MAX

            for move in legal_moves:
                new_context = Context(context)
                new_context.game().apply(new_context, move)
                new_value = self.minimax(new_context, depth - 1, alpha, beta, True)
                min_value = min(min_value, new_value)

                if min_value <= alpha:
                    break

                beta = min(beta, min_value)

            return min_value

    def evaluate(self, context: Context, is_maximizing: bool) -> float:
        if context.trial().over():
            return self.evaluate_terminal_state(context)

        starting_player = self.player if is_maximizing else 1 - self.player
        return self.evaluate_with_playouts(context, starting_player)

    def evaluate_terminal_state(self, context: Context) -> float:
        if self.player in context.winners():
            return self.MAX
        elif not context.winners():
            return self.NEUTRAL
        else:
            return self.MIN

    def evaluate_with_playouts(self, context: Context, starting_player: int) -> float:
        evaluation = 0.0

        for _ in range(self.evaluation_playouts):
            evaluation += self.make_playout(context, starting_player)

        return evaluation / self.evaluation_playouts

    def make_playout(self, context: Context, starting_player: int) -> float:
        new_context = Context(context)
        current_player = starting_player
        depth = 0

        while not new_context.trial().over() and depth < self.max_playout_depth:
            move = self.get_random_move(new_context, current_player)
            new_context.game().apply(new_context, move)
            current_player = 1 - current_player
            depth += 1

        return self.evaluate_terminal_state(new_context)

    def get_random_move(self, context: Context, player: int) -> Move:
        legal_moves = self.game.moves(context).moves()

        if not self.game.is_alternating_move_game():
            legal_moves = AIUtils.extract_moves_for_mover(legal_moves, player)

        return random.choice(legal_moves)

    def init_ai(self, game: Game, player_id: int):
        self.game = game
        self.player = player_id
        self.reached_depths.clear()
        self.spent_times.clear()


: 