# Othello Probcut

### Initiale Konfiguration

Importieren von Abhängigkeiten und Konfiguration

In [1]:
%run othello_game.ipynb

In [2]:
import math
import copy
import time
import numpy

# STRATEGIES
HUMAN = 'H'
TRIVIAL = 'T'
MINIMAX = 'M'
ALPHABETA = 'A'
ALPHABETAID = 'I'
PROBCUT = 'P'

# PLAYERS
BLACK_PLAYER = TRIVIAL
WHITE_PLAYER = ALPHABETA

### Hilfsfunktionen
Diese Hilfsfunktionen werden von mehreren KI's verwendet

Die Funktion <tt>terminal_utility(state)</tt> gibt für einen Endzustand die Utility für den Spieler, der am Zug wäre zurück. +1 falls dieser gewinnt, 0 bei unentschieden, -1 wenn dieser verliert.

In [3]:
def terminal_utility(state):
    return 1 - abs(get_winner(state) - state.turn)

### Heuristiken

In [4]:
def disc_count_heuristic(state):
    return (count_disks(state, state.turn) - count_disks(state, -state.turn)) / 64

In [5]:
def mobility_heuristic(state):
    return (len(get_possible_moves(state, state.turn)) - len(get_possible_moves(state, -state.turn))) / 64

In [6]:
def heuristic(state):
    if(state.num_pieces >= 50):
        return disc_count_heuristic(state)
    mobility = mobility_heuristic(state)
    cowthello = cowthello_heuristic(state)
    return (mobility + cowthello) / 2

In [7]:
def gen_weight_matrix(default=0, corner=0, adj_corner=0, sup_corner=0, edge=0, dia_corner=0, support=0, sup_edge=0):
    raw_matrix = numpy.array([
        [corner,      adj_corner,  sup_corner,  edge,      edge,      sup_corner,  adj_corner,  corner],
        [adj_corner,  dia_corner,  default,     default,   default,   default,     dia_corner,  adj_corner],
        [sup_corner,  default,     support,     sup_edge,  sup_edge,  support,     default,     sup_corner],
        [edge,        default,     sup_edge,    default,   default,   sup_edge,    default,     edge],
        [edge,        default,     sup_edge,    default,   default,   sup_edge,    default,     edge],
        [sup_corner,  default,     support,     sup_edge,  sup_edge,  support,     default,     sup_corner],
        [adj_corner,  dia_corner,  default,     default,   default,   default,     dia_corner,  adj_corner],
        [corner,      adj_corner,  sup_corner,  edge,      edge,      sup_corner,  adj_corner,  corner]
    ])
    max_possible = numpy.sum(numpy.absolute(raw_matrix))
    return numpy.true_divide(raw_matrix, max_possible)

In [8]:
def weight_heuristic(state, weights):
    return numpy.sum(numpy.multiply(state.board, weights)) * state.turn

In [9]:
cowthello_weights = gen_weight_matrix(default=1, corner=100, adj_corner=-25, sup_corner=25, edge=10, dia_corner=-50, support=50, sup_edge=5)

In [10]:
def cowthello_heuristic(state):
    return weight_heuristic(state, cowthello_weights)

### Triviale KI
Diese KI iteriert reihenweise über das Spielfeld und spielt den ersten gültigen Zug

In [11]:
def trivial_ai_make_move(state):
    for row in range(BOARD_SIZE):
        for col in range(BOARD_SIZE):
            if is_move_valid(state, row, col, state.turn):
                make_move(state, row, col)
                return

### Minimax KI
Diese KI verwendet den Minimax Algorithmus

In [12]:
MINIMAX_DEPTH_LIMIT = 4

#TODO: What if one player has to play twice in a row?
def minimax(state, depth):
    global minimax_best_move
    if(state.game_over):
        return terminal_utility(state) * state.turn
    if(depth >= MINIMAX_DEPTH_LIMIT):
        return heuristic_utility(state) * state.turn
    utility = -math.inf
    for move in get_possible_moves(state, state.turn):
        tmp_state = copy.deepcopy(state)
        make_move(tmp_state, move[0], move[1])
        tmp_utility = -minimax(tmp_state, depth + 1)
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == 0):
                minimax_best_move = move
    return utility

def minimax_ai_make_move(state):
    minimax(state, 0)
    make_move(state, minimax_best_move[0], minimax_best_move[1])

### Alpha-Beta KI
Diese KI verwended den Minimax Algorithmus mit Alpha-Beta Pruning

In [13]:
transposition_table_neg = {}

#TODO: What if one player has to play twice in a row?
def alphabeta_id(state, depth, alpha, beta):
    global alphabeta_best_move_id
    if(state.game_over):
        return terminal_utility(state) * state.turn
    if(depth == 0):
        if(state.turn == WHITE):
            return WHITE_PLAYER_HEURISTIC(state)
        else:
            return BLACK_PLAYER_HEURISTIC(state)
    moves = get_possible_moves(state, state.turn)
    child_states = [make_move(copy.deepcopy(state), move[0], move[1]) for move in moves]
    estimated_utilities = [transposition_table_neg[(child_state.board.tobytes(), child_state.turn)] if (child_state.board.tobytes(), child_state.turn) in transposition_table_neg else WHITE_PLAYER_HEURISTIC(state) if state.turn == WHITE else BLACK_PLAYER_HEURISTIC(state) for child_state in child_states]
    ordered_moves = [(estimated_utilities[i], moves[i], child_states[i]) for i in range(len(moves))]
    ordered_moves.sort(reverse=True)
    
    utility = -math.inf
    for (_, move, tmp_state) in ordered_moves:
        tmp_utility = -alphabeta_id(tmp_state, depth - 1, -beta, -alpha)
        transposition_table_neg[(tmp_state.board.tobytes(), tmp_state.turn)] = tmp_utility
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == ALPHABETA_DEPTH_LIMIT):
                alphabeta_best_move_id = move
        if(utility >= beta):
            return utility
        alpha = max(alpha, utility)
    return utility

def alphabeta_id_make_move(state):
    global alphabeta_best_move_id
    depth = 1
    while depth <= ALPHABETA_DEPTH_LIMIT:
        alphabeta_id(state, depth, -math.inf, math.inf)
        depth += 1
    make_move(state, alphabeta_best_move_id[0], alphabeta_best_move_id[1])

In [52]:
# TODO: Bei ungerader Zahl wird die Heuristik für den Falschen Spieler verwendet
ALPHABETA_DEPTH_LIMIT = 4

transposition_table = {}

def alphabeta_max(state, depth, alpha, beta):
    global alphabeta_best_move
    if(state.game_over):
        return terminal_utility(state)
    if(depth == 0):
        if(state.turn == WHITE):
            return WHITE_PLAYER_HEURISTIC(state)
        else:
            return BLACK_PLAYER_HEURISTIC(state)

    moves = get_possible_moves(state, state.turn)
    child_states = [make_move(copy.deepcopy(state), move[0], move[1]) for move in moves]
    estimated_utilities = [transposition_table[(child_state.board.tobytes(), child_state.turn)] if (child_state.board.tobytes(), child_state.turn) in transposition_table else WHITE_PLAYER_HEURISTIC(state) if state.turn == WHITE else BLACK_PLAYER_HEURISTIC(state) for child_state in child_states]
    ordered_moves = [(estimated_utilities[i], moves[i], child_states[i]) for i in range(len(moves))]
    ordered_moves.sort(reverse=True)

    max_utility = -math.inf
    for (_, move, tmp_state) in ordered_moves:
        tmp_utility = alphabeta_min(tmp_state, depth - 1, alpha, beta)
        transposition_table[(tmp_state.board.tobytes(), tmp_state.turn)] = tmp_utility
        if(tmp_utility > max_utility):
            max_utility = tmp_utility
            if(depth == ALPHABETA_DEPTH_LIMIT):
                alphabeta_best_move = move
        if(max_utility >= beta):
            return max_utility
        alpha = max(alpha, max_utility)
    return max_utility

def alphabeta_min(state, depth, alpha, beta):
    #global alphabeta_best_move
    if(state.game_over):
        return -terminal_utility(state)
    if(depth == 0):
        if(state.turn == WHITE):
            return -WHITE_PLAYER_HEURISTIC(state)
        else:
            return -BLACK_PLAYER_HEURISTIC(state)

    moves = get_possible_moves(state, state.turn)
    child_states = [make_move(copy.deepcopy(state), move[0], move[1]) for move in moves]
    estimated_utilities = [transposition_table[(child_state.board.tobytes(), child_state.turn)] if (child_state.board.tobytes(), child_state.turn) in transposition_table else WHITE_PLAYER_HEURISTIC(state) if state.turn == WHITE else BLACK_PLAYER_HEURISTIC(state) for child_state in child_states]
    ordered_moves = [(estimated_utilities[i], moves[i], child_states[i]) for i in range(len(moves))]
    ordered_moves.sort(reverse=True)

    min_utility = math.inf
    for (_, move, tmp_state) in ordered_moves:
        tmp_utility = alphabeta_max(tmp_state, depth - 1, alpha, beta)
        transposition_table[(tmp_state.board.tobytes(), tmp_state.turn)] = tmp_utility
        if(tmp_utility < min_utility):
            min_utility = tmp_utility
            #if(depth == ALPHABETA_DEPTH_LIMIT):
            #    alphabeta_best_move = move
        if(min_utility <= alpha):
            return min_utility
        beta = min(beta, min_utility)
    return min_utility

def alphabeta_ai_make_move(state):
    global alphabeta_best_move
    depth = 2
    while depth <= ALPHABETA_DEPTH_LIMIT:
        alphabeta_max(state, depth, -math.inf, math.inf)
        depth += 2
    make_move(state, alphabeta_best_move[0], alphabeta_best_move[1])

### ProbCut KI
An dieser Stelle beginnt die Implementierung der Künstlichen Intelligenz mittels des Minimax Algorithmus und ProbCut

In [15]:
def probcut_ai_make_move(state):
    for row in range(state.rows):
        for col in range(state.cols):
            try:
                state.move(row, col)
                return
            except InvalidMoveException:
                pass

### Applikation Starten

Führen sie zum Started der Applikations den folgenden Code aus.

In [18]:
BLACK_PLAYER_HEURISTIC = mobility_heuristic
WHITE_PLAYER_HEURISTIC = heuristic

In [None]:
state = GameState()
display_board(state)

def next_move(state):
    time.sleep(0.5)
    # Check if/which AI is playing
    strat = BLACK_PLAYER if state.turn == BLACK else WHITE_PLAYER
    if strat != HUMAN:
        # Execute AI strategy for current player
        if(strat == TRIVIAL):
            trivial_ai_make_move(state)
        elif(strat == MINIMAX):
            minimax_ai_make_move(state)
        elif(strat == ALPHABETA):
            alphabeta_ai_make_move(state)
        elif(strat == ALPHABETAID):
            alphabeta_id_make_move(state)
#        elif(strat == PROBCUT):
#            probcut_ai_make_move(state)
#        else:
#            raise Exception('Invalid Strategy')
#
        update_output(state)
        if not state.game_over:
            next_move(state)

next_move(state)

### Testing code

In [53]:
test_board = GameState()

In [None]:
alphabeta_id_make_move(test_board)
test_board.board

In [54]:
import cProfile

cProfile.run('alphabeta_ai_make_move(test_board)')
test_board.board

         123605 function calls (110207 primitive calls) in 0.445 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      119    0.000    0.000    0.001    0.000 <__array_function__ internals>:2(sum)
      167    0.000    0.000    0.085    0.001 <ipython-input-1-493ae9bb8fc6>:1(can_move)
      829    0.005    0.000    0.400    0.000 <ipython-input-1-81599aadcfb2>:1(get_possible_moves)
      167    0.001    0.000    0.183    0.001 <ipython-input-1-89c6ac4c885e>:1(make_move)
      167    0.004    0.000    0.004    0.000 <ipython-input-1-aa39cc9373e8>:1(adjacent_opposite_color_directions)
      167    0.004    0.000    0.004    0.000 <ipython-input-1-b3836a12d32b>:1(update_frontier)
    13244    0.120    0.000    0.120    0.000 <ipython-input-1-b61068ea6955>:1(is_valid_directional_move)
      181    0.001    0.000    0.003    0.000 <ipython-input-1-b76d4155a80d>:1(convert_adjacent_cells_in_direction)
    15116    0.279    0.000  

array([[ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1, -1,  0,  0,  0],
       [ 0,  0,  0, -1, -1,  0,  0,  0],
       [ 0,  0,  0,  0, -1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0]], dtype=int8)

# Legacy code

def alphabeta_max(state, alpha, beta, depth):
    global alphabeta_best_move
    if(state.game_over):
        return terminal_utility(state)
    if(depth >= ALPHABETA_DEPTH_LIMIT):
        return heuristic_utility(state)
    max_utility = -math.inf
    for move in state.get_possible_moves():
        tmp_state = copy.deepcopy(state)
        tmp_state.move(move[0], move[1])
        tmp_utility = alphabeta_min(tmp_state, alpha, beta, depth + 1)
        if(tmp_utility > max_utility):
            max_utility = tmp_utility
            if(depth == 0):
                alphabeta_best_move = move
        if(max_utility >= beta):
            return max_utility
        alpha = max(alpha, max_utility)
    return max_utility

def alphabeta_min(state, alpha, beta, depth):
    global alphabeta_best_move
    if(state.game_over):
        return -terminal_utility(state)
    if(depth >= ALPHABETA_DEPTH_LIMIT):
        return -heuristic_utility(state)
    min_utility = math.inf
    for move in state.get_possible_moves():
        tmp_state = copy.deepcopy(state)
        tmp_state.move(move[0], move[1])
        tmp_utility = alphabeta_max(tmp_state, alpha, beta, depth + 1)
        if(tmp_utility < min_utility):
            min_utility = tmp_utility
            if(depth == 0):
                alphabeta_best_move = move
        if(min_utility <= alpha):
            return min_utility
        beta = min(beta, min_utility)
    return min_utility

ALPHABETA_DEPTH_LIMIT = 4

#TODO: What if one player has to play twice in a row?
def alphabeta_negamax(state, depth, alpha, beta):
    global alphabeta_best_move
    if(state.game_over):
        return terminal_utility(state) * state.turn
    if(depth == 0):
        if(state.turn == WHITE):
            return WHITE_PLAYER_HEURISTIC(state)
        else:
            return BLACK_PLAYER_HEURISTIC(state) * state.turn
    utility = -math.inf
    for move in get_possible_moves(state, state.turn):
        tmp_state = copy.deepcopy(state)
        make_move(tmp_state, move[0], move[1])
        tmp_utility = -alphabeta_negamax(tmp_state, depth - 1, -beta, -alpha)
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == ALPHABETA_DEPTH_LIMIT):
                alphabeta_best_move = move
        if(utility >= beta):
            return utility
        alpha = max(alpha, utility)
    return utility

def alphabeta_ai_make_move(state):
    alphabeta_negamax(state, ALPHABETA_DEPTH_LIMIT, -math.inf, math.inf)
    make_move(state, alphabeta_best_move[0], alphabeta_best_move[1])