# Othello Probcut

### Initiale Konfiguration

Importieren von Abhängigkeiten und Konfiguration

In [None]:
%run othello_game.ipynb

In [None]:
import math
import copy
import time

# STRATEGIES
HUMAN = 'H'
TRIVIAL = 'T'
MINIMAX = 'M'
ALPHABETA = 'A'
ALPHABETAID = 'I'
PROBCUT = 'P'

# PLAYERS
BLACK_PLAYER = ALPHABETAID
WHITE_PLAYER = ALPHABETAID

BLACK_PLAYER_HEURISTIC = mobility_heuristic
WHITE_PLAYER_HEURISTIC = heuristic

### Hilfsfunktionen
Diese Hilfsfunktionen werden von mehreren KI's verwendet

Die Funktion <tt>terminal_utility(state)</tt> gibt für einen Spielzustand einen Wert zurück der repräsentiert, ob und welcher Spieler gewonnen hat. Hat kein Spieler gewonnen, wird der Wert 0 zurückgegeben, ansonsten 1 oder -1 für den maximierenden oder minimierenden Spieler.

In [None]:
def terminal_utility(state):
    winner = get_winner(state)
    if(winner is None):
        return 0
    else:
        return 1 if winner == state.turn else -1

Die Funktion <tt>heuristic_utility(state)</tt> berechnet eine Heuristik für einen Spielzustand zwischen -1 und 1.

In [None]:
def trivial_heuristic(state):
    player_cells = 0
    opponent_cells = 0
    for x in range(BOARD_SIZE):
        for y in range(BOARD_SIZE):
            if state.board[x, y] == WHITE:
                player_cells += 1
            elif state.board[x, y] == BLACK:
                opponent_cells += 1
    return (player_cells - opponent_cells) / 64

In [None]:
def mobility_heuristic(state):
    return get_mobility_heuristic(state)

In [None]:
def heuristic(state):
    mobility = get_mobility_heuristic(state)
    location = get_location_heuristic(state)
    return (mobility + location) / 2

In [None]:
def get_mobility_heuristic(state):
    return (len(get_possible_moves(state, state.turn)) - len(get_possible_moves(state, -state.turn))) / 64

In [None]:
def get_location_heuristic(state):
    corners = {(0,0):(1,1),(0,7):(1,6),(7,0):(6,1),(7,7):(6,6)}
    sum = 0
    for corner, xfield in corners.items():
        if(state.board[corner] == NONE):
            sum -= state.board[xfield]
        else:
            sum += state.board[corner]
    return sum / 4

### Triviale KI
Diese KI iteriert reihenweise über das Spielfeld und spielt den ersten gültigen Zug

In [None]:
def trivial_ai_make_move(state):
    for row in range(BOARD_SIZE):
        for col in range(BOARD_SIZE):
            if is_move_valid(state, row, col):
                make_move(state, row, col)
                return

### Minimax KI
Diese KI verwendet den Minimax Algorithmus

In [None]:
MINIMAX_DEPTH_LIMIT = 4

#TODO: What if one player has to play twice in a row?
def minimax(state, depth):
    global minimax_best_move
    if(is_game_over(state)):
        return terminal_utility(state) * state.turn
    if(depth >= MINIMAX_DEPTH_LIMIT):
        return heuristic_utility(state) * state.turn
    utility = -math.inf
    for move in get_possible_moves(state, state.turn):
        tmp_state = copy.deepcopy(state)
        make_move(tmp_state, move[0], move[1])
        tmp_utility = -minimax(tmp_state, depth + 1)
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == 0):
                minimax_best_move = move
    return utility

def minimax_ai_make_move(state):
    minimax(state, 0)
    make_move(state, minimax_best_move[0], minimax_best_move[1])

### Alpha-Beta KI
Diese KI verwended den Minimax Algorithmus mit Alpha-Beta Pruning

In [None]:
ALPHABETA_DEPTH_LIMIT = 4

#TODO: What if one player has to play twice in a row?
def alphabeta(state, depth, alpha, beta):
    global alphabeta_best_move
    if(is_game_over(state)):
        return terminal_utility(state) * state.turn
    if(depth == 0):
        if(state.turn == WHITE):
            return WHITE_PLAYER_HEURISTIC(state)
        else:
            return BLACK_PLAYER_HEURISTIC(state) * state.turn
    utility = -math.inf
    for move in get_possible_moves(state, state.turn):
        tmp_state = copy.deepcopy(state)
        make_move(tmp_state, move[0], move[1])
        tmp_utility = -alphabeta(tmp_state, depth - 1, -beta, -alpha)
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == ALPHABETA_DEPTH_LIMIT):
                alphabeta_best_move = move
        if(utility >= beta):
            return utility
        alpha = max(alpha, utility)
    return utility

def alphabeta_ai_make_move(state):
    alphabeta(state, ALPHABETA_DEPTH_LIMIT, -math.inf, math.inf)
    make_move(state, alphabeta_best_move[0], alphabeta_best_move[1])

In [None]:
transposition_table = {}

#TODO: What if one player has to play twice in a row?
def alphabeta_id(state, depth, alpha, beta):
    global alphabeta_best_move_id
    if(is_game_over(state)):
        return terminal_utility(state) * state.turn
    if(depth == 0):
        if(state.turn == WHITE):
            return WHITE_PLAYER_HEURISTIC(state)
        else:
            return BLACK_PLAYER_HEURISTIC(state)
    moves = get_possible_moves(state, state.turn)
    child_states = [make_move(copy.deepcopy(state), move[0], move[1]) for move in moves]
    estimated_utilities = [transposition_table[(child_state.board.tobytes(), child_state.turn)] if (child_state.board.tobytes(), child_state.turn) in transposition_table else WHITE_PLAYER_HEURISTIC(state) if state.turn == WHITE else BLACK_PLAYER_HEURISTIC(state) for child_state in child_states]
    ordered_moves = [(estimated_utilities[i], moves[i], child_states[i]) for i in range(len(moves))]
    ordered_moves.sort(reverse=True)
    
    utility = -math.inf
    for (_, move, tmp_state) in ordered_moves:
        tmp_utility = -alphabeta_id(tmp_state, depth - 1, -beta, -alpha)
        transposition_table[(tmp_state.board.tobytes(), tmp_state.turn)] = tmp_utility
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == ALPHABETA_DEPTH_LIMIT):
                alphabeta_best_move_id = move
        if(utility >= beta):
            return utility
        alpha = max(alpha, utility)
    return utility

def alphabeta_id_make_move(state):
    global alphabeta_best_move_id
    depth = 1
    while depth <= ALPHABETA_DEPTH_LIMIT:
        alphabeta_id(state, depth, -math.inf, math.inf)
        depth += 1
    make_move(state, alphabeta_best_move_id[0], alphabeta_best_move_id[1])

### ProbCut KI
An dieser Stelle beginnt die Implementierung der Künstlichen Intelligenz mittels des Minimax Algorithmus und ProbCut

In [None]:
def probcut_ai_make_move(state):
    for row in range(state.rows):
        for col in range(state.cols):
            try:
                state.move(row, col)
                return
            except InvalidMoveException:
                pass

### Applikation Starten

Führen sie zum Started der Applikations den folgenden Code aus.

In [None]:
state = GameState()
display_board(state)

def next_move(state):
    time.sleep(0.5)
    # Check if/which AI is playing
    strat = BLACK_PLAYER if state.turn == BLACK else WHITE_PLAYER
    if strat != HUMAN:
        # Execute AI strategy for current player
        if(strat == TRIVIAL):
            trivial_ai_make_move(state)
        elif(strat == MINIMAX):
            minimax_ai_make_move(state)
        elif(strat == ALPHABETA):
            alphabeta_ai_make_move(state)
        elif(strat == ALPHABETAID):
            alphabeta_id_make_move(state)
#        elif(strat == PROBCUT):
#            probcut_ai_make_move(state)
#        else:
#            raise Exception('Invalid Strategy')
#
        update_output(state)
        if not is_game_over(state):
            next_move(state)

next_move(state)

### Testing code

In [None]:
test_board = GameState()

In [None]:
alphabeta_id_make_move(test_board)
test_board.board

In [None]:
import cProfile

cProfile.run('alphabeta_id_make_move(test_board)')
test_board.board

In [None]:
#transposition_table[(test_board.board.tobytes(), test_board.turn)] = 1
transposition_table[(test_board.board.tobytes(), test_board.turn)]

# Legacy code

In [None]:
def alphabeta_max(state, alpha, beta, depth):
    global alphabeta_best_move
    if(is_game_over(state)):
        return terminal_utility(state)
    if(depth >= ALPHABETA_DEPTH_LIMIT):
        return heuristic_utility(state)
    max_utility = -math.inf
    for move in state.get_possible_moves():
        tmp_state = copy.deepcopy(state)
        tmp_state.move(move[0], move[1])
        tmp_utility = alphabeta_min(tmp_state, alpha, beta, depth + 1)
        if(tmp_utility > max_utility):
            max_utility = tmp_utility
            if(depth == 0):
                alphabeta_best_move = move
        if(max_utility >= beta):
            return max_utility
        alpha = max(alpha, max_utility)
    return max_utility

def alphabeta_min(state, alpha, beta, depth):
    global alphabeta_best_move
    if(is_game_over(state)):
        return -terminal_utility(state)
    if(depth >= ALPHABETA_DEPTH_LIMIT):
        return -heuristic_utility(state)
    min_utility = math.inf
    for move in state.get_possible_moves():
        tmp_state = copy.deepcopy(state)
        tmp_state.move(move[0], move[1])
        tmp_utility = alphabeta_max(tmp_state, alpha, beta, depth + 1)
        if(tmp_utility < min_utility):
            min_utility = tmp_utility
            if(depth == 0):
                alphabeta_best_move = move
        if(min_utility <= alpha):
            return min_utility
        beta = min(beta, min_utility)
    return min_utility