# Othello Probcut

### Initiale Konfiguration

Importieren von Abhängigkeiten und Konfiguration

In [None]:
%run othello_game.ipynb

In [None]:
import math
import copy
import time
import numpy
import random

### Hilfsfunktionen
Diese Hilfsfunktionen werden von mehreren KI's verwendet

### Heuristiken

In [None]:
def disc_count_heuristic(state):
    return (count_disks(state, WHITE) - count_disks(state, BLACK)) / 64

In [None]:
def mobility_heuristic(state):
    return (len(get_possible_moves(state, WHITE)) - len(get_possible_moves(state, BLACK))) / 64

In [None]:
def gen_weight_matrix(default=0, corner=0, adj_corner=0, sup_corner=0, edge=0, dia_corner=0, support=0, sup_edge=0):
    raw_matrix = numpy.array([
        [corner,      adj_corner,  sup_corner,  edge,      edge,      sup_corner,  adj_corner,  corner],
        [adj_corner,  dia_corner,  default,     default,   default,   default,     dia_corner,  adj_corner],
        [sup_corner,  default,     support,     sup_edge,  sup_edge,  support,     default,     sup_corner],
        [edge,        default,     sup_edge,    default,   default,   sup_edge,    default,     edge],
        [edge,        default,     sup_edge,    default,   default,   sup_edge,    default,     edge],
        [sup_corner,  default,     support,     sup_edge,  sup_edge,  support,     default,     sup_corner],
        [adj_corner,  dia_corner,  default,     default,   default,   default,     dia_corner,  adj_corner],
        [corner,      adj_corner,  sup_corner,  edge,      edge,      sup_corner,  adj_corner,  corner]
    ])
    max_possible = numpy.sum(numpy.absolute(raw_matrix))
    return numpy.true_divide(raw_matrix, max_possible)

In [None]:
def weight_heuristic(state, weights):
    return numpy.sum(numpy.multiply(state.board, weights))

In [None]:
cowthello_weights = gen_weight_matrix(default=1, corner=100, adj_corner=-25, sup_corner=25, edge=10, dia_corner=-50, support=50, sup_edge=5)

In [None]:
def cowthello_heuristic(state):
    return weight_heuristic(state, cowthello_weights)

In [None]:
def combined_heuristic(state):
    if(state.num_pieces >= 50):
        return disc_count_heuristic(state)
    mobility = mobility_heuristic(state)
    cowthello = cowthello_heuristic(state)
    return (mobility + cowthello) / 2

### Zufällige KI
Diese KI wählt aus der Menge der Möglichen Züge einen zufälligen aus und spielt diesen.

In [None]:
def random_ai_make_move(state, heuristic):
    possible_moves = get_possible_moves(state, state.turn)
    random_move = random.choice(possible_moves)
    make_move(state, random_move[0], random_move[1])

### Minimax KI
Diese KI verwendet den Minimax Algorithmus

In [None]:
MINIMAX_DEPTH_LIMIT = 3
debug_mm_count= 0

def minimax(state, depth, heuristic):
    global debug_mm_count
    if(state.game_over):
        return get_winner(state)
    if(depth <= 0):
        return heuristic(state)
    
    if state.turn == WHITE:
        # maximizing
        utility = -math.inf
    else:
        # minimizing
        utility = math.inf
        
    for move in get_possible_moves(state, state.turn):
        debug_mm_count += 1
        tmp_state = copy.deepcopy(state)
        make_move(tmp_state, move[0], move[1])
        tmp_utility = minimax(tmp_state, depth - 1, heuristic)
        if state.turn == WHITE:
            # maximizing
            utility = max(utility, tmp_utility)
        else:
            # minimizing
            utility = min(utility, tmp_utility)          
    return utility

def minimax_ai_make_move(state, heuristic):
    if(state.game_over):
        return
    scored_moves = [(minimax(make_move(copy.deepcopy(state), move[0], move[1]), MINIMAX_DEPTH_LIMIT-1, heuristic), move) for move in state.possible_moves]
    if state.turn == WHITE:
        # maximizing
        best_score, _ = max(scored_moves)
    else:
        # minimizing
        best_score, _ = min(scored_moves)
    best_move = random.choice([move for move in scored_moves if move[0] == best_score])[1]
    make_move(state, best_move[0], best_move[1])
    

### Alpha-Beta KI
Diese KI verwended den Minimax Algorithmus mit Alpha-Beta Pruning

In [None]:
transposition_table = {}

In [None]:
ALPHABETA_DEPTH_LIMIT = 3
debug_ab_count= 0

def alphabeta(state, depth, alpha, beta, heuristic):
    global debug_ab_count
    if(state.game_over):
        return get_winner(state)
    if(depth <= 0):
        return heuristic(state)
    
    moves = get_possible_moves(state, state.turn)
    child_states = [make_move(copy.deepcopy(state), move[0], move[1]) for move in moves]
    estimated_utilities = [transposition_table[(child_state.board.tobytes(), child_state.turn, heuristic)]
                           if (child_state.board.tobytes(), child_state.turn, heuristic) in transposition_table
                           else heuristic(state)
                           for child_state in child_states]
    ordered_moves = [(estimated_utilities[i], moves[i], child_states[i]) for i in range(len(moves))]
    ordered_moves.sort(reverse=(state.turn == WHITE))
    
    if state.turn == WHITE:
        # maximizing
        utility = -math.inf
    else:
        # minimizing
        utility = math.inf
        
    for (_, move, tmp_state) in ordered_moves:
        debug_ab_count += 1
        tmp_utility = alphabeta(tmp_state, depth - 1, alpha, beta, heuristic)
        transposition_table[(tmp_state.board.tobytes(), tmp_state.turn, heuristic)] = tmp_utility
        
        if state.turn == WHITE:
            # maximizing
            utility = max(utility, tmp_utility)
            alpha = max(alpha, utility)
        else:
            # minimizing
            utility = min(utility, tmp_utility)
            beta = min(beta, utility)
        if(alpha >= beta):
            break # alphabeta pruning
    return utility

def alphabeta_ai_make_move(state, heuristic):
    if(state.game_over):
        return
    scored_moves = [(alphabeta(make_move(copy.deepcopy(state), move[0], move[1]), ALPHABETA_DEPTH_LIMIT-1, -math.inf, math.inf, heuristic), move) for move in state.possible_moves]
    if state.turn == WHITE:
        # maximizing
        best_score, _ = max(scored_moves)
    else:
        # minimizing
        best_score, _ = min(scored_moves)
    best_move = random.choice([move for move in scored_moves if move[0] == best_score])[1]
    make_move(state, best_move[0], best_move[1])
    
def alphabeta_id_make_move(state, heuristic):
    best_move = None
    depth = 1
    while depth <= ALPHABETA_DEPTH_LIMIT:
        scored_moves = [(alphabeta(make_move(copy.deepcopy(state), move[0], move[1]), depth-1, -math.inf, math.inf, heuristic), move) for move in state.possible_moves]
        if state.turn == WHITE:
            # maximizing
            best_score, _ = max(scored_moves)
        else:
            # minimizing
            best_score, _ = min(scored_moves)
        alphabeta(state, depth, -math.inf, math.inf, heuristic)
        best_move = random.choice([move for move in scored_moves if move[0] == best_score])[1]
        depth += 1
    make_move(state, best_move[0], best_move[1])

### ProbCut KI
An dieser Stelle beginnt die Implementierung der Künstlichen Intelligenz mittels des Minimax Algorithmus und ProbCut

In [None]:
def probcut_ai_make_move(state, heuristic):
    for row in range(state.rows):
        for col in range(state.cols):
            try:
                state.move(row, col)
                return
            except InvalidMoveException:
                pass

### Applikation Starten

Führen sie zum Started der Applikations den folgenden Code aus.

In [None]:
# Settings
BLACK_PLAYER_AI = alphabeta_id_make_move
WHITE_PLAYER_AI = minimax_ai_make_move

BLACK_PLAYER_HEURISTIC = combined_heuristic
WHITE_PLAYER_HEURISTIC = combined_heuristic

PLAYER_AI = {BLACK: BLACK_PLAYER_AI, WHITE: WHITE_PLAYER_AI}
PLAYER_HEURISTIC = {BLACK: BLACK_PLAYER_HEURISTIC, WHITE: WHITE_PLAYER_HEURISTIC}

In [None]:
state = GameState()
display_board(state)

def next_move(state):
    time.sleep(0.2)
    # Check if/which AI is playing
    strat = BLACK_PLAYER_AI if state.turn == BLACK else WHITE_PLAYER_AI
    if strat is not None:
        strat(state, PLAYER_HEURISTIC[state.turn])
        update_output(state)
        if not state.game_over:
            next_move(state)

try:
    next_move(state)
except KeyboardInterrupt:
    pass

### Testing code

In [None]:
test_board = GameState()

In [None]:
alphabeta_ai_make_move(test_board, combined_heuristic)
test_board.board

In [None]:
test_board.board

In [None]:
import cProfile

cProfile.run('alphabeta_ai_make_move(test_board, combined_heuristic)')
test_board.board

In [None]:
def debug_num_visited_states(state):
    global debug_ab_count
    global debug_mm_count
    debug_mm_count= 0
    start = time.time()
    minimax_ai_make_move(copy.deepcopy(state), combined_heuristic)
    secs = time.time() - start
    print("Minimax takes ", secs, " seconds and checks ", debug_mm_count, "substates")
    debug_ab_count= 0
    transposition_table = {}
    start = time.time()
    alphabeta_ai_make_move(copy.deepcopy(state), combined_heuristic)
    secs = time.time() - start
    print("AlphaBeta takes ", secs, " seconds and checks ", debug_ab_count, "substates")
    debug_ab_count= 0
    transposition_table = {}
    start = time.time()
    alphabeta_id_make_move(copy.deepcopy(state), combined_heuristic)
    secs = time.time() - start
    print("AlphaBeta with iterative deepening takes ", secs, " seconds and checks ", debug_ab_count, "substates")

In [None]:
debug_num_visited_states(test_board)

In [None]:
def get_statistics(num, black_ai, black_h, white_ai, white_h):
    status = ipywidgets.widgets.Label()
    display(status)
    result = []
    wins = [0, 0, 0]
    status.value = f'0 / {num} games played, b/d/w: {wins[0]}/{wins[1]}/{wins[2]}'
    try:
        for i in range(num):
            (b, w) = play_game(black_ai, black_h, white_ai, white_h)
            result.append((b, w))
            if b > w:
                wins[0] += 1
            elif w == b:
                wins[1] += 1
            else:
                wins[2] += 1
            status.value = f'{i+1} / {num} games played, b/d/w: {wins[0]}/{wins[1]}/{wins[2]}'
    except KeyboardInterrupt:
        status.value = f'Interrupted: {i} / {num} games played, b/d/w: {wins[0]}/{wins[1]}/{wins[2]}'
    print_statistics(result)

def play_game(black_ai, black_h, white_ai, white_h):
    state = GameState()
    next_move_blind(state, black_ai, white_ai, {BLACK: black_h, WHITE: white_h})
    return count_disks(state, BLACK), count_disks(state, WHITE)


def next_move_blind(state, black_ai, white_ai, heuristics):
    # Check if/which AI is playing
    strat = black_ai if state.turn == BLACK else white_ai
    strat(state, heuristics[state.turn])
    if not state.game_over:
        next_move_blind(state, black_ai, white_ai, heuristics)

def print_statistics(results):
    print(results)

In [None]:
get_statistics(5, random_ai_make_move, cowthello_heuristic, random_ai_make_move, cowthello_heuristic)

# Legacy code

def alphabeta_max(state, alpha, beta, depth):
    global alphabeta_best_move
    if(state.game_over):
        return terminal_utility(state)
    if(depth >= ALPHABETA_DEPTH_LIMIT):
        return heuristic_utility(state)
    max_utility = -math.inf
    for move in state.get_possible_moves():
        tmp_state = copy.deepcopy(state)
        tmp_state.move(move[0], move[1])
        tmp_utility = alphabeta_min(tmp_state, alpha, beta, depth + 1)
        if(tmp_utility > max_utility):
            max_utility = tmp_utility
            if(depth == 0):
                alphabeta_best_move = move
        if(max_utility >= beta):
            return max_utility
        alpha = max(alpha, max_utility)
    return max_utility

def alphabeta_min(state, alpha, beta, depth):
    global alphabeta_best_move
    if(state.game_over):
        return -terminal_utility(state)
    if(depth >= ALPHABETA_DEPTH_LIMIT):
        return -heuristic_utility(state)
    min_utility = math.inf
    for move in state.get_possible_moves():
        tmp_state = copy.deepcopy(state)
        tmp_state.move(move[0], move[1])
        tmp_utility = alphabeta_max(tmp_state, alpha, beta, depth + 1)
        if(tmp_utility < min_utility):
            min_utility = tmp_utility
            if(depth == 0):
                alphabeta_best_move = move
        if(min_utility <= alpha):
            return min_utility
        beta = min(beta, min_utility)
    return min_utility

ALPHABETA_DEPTH_LIMIT = 4

#TODO: What if one player has to play twice in a row?
def alphabeta_negamax(state, depth, alpha, beta):
    global alphabeta_best_move
    if(state.game_over):
        return terminal_utility(state) * state.turn
    if(depth == 0):
        if(state.turn == WHITE):
            return WHITE_PLAYER_HEURISTIC(state)
        else:
            return BLACK_PLAYER_STRAT_HEURISTIC(state) * state.turn
    utility = -math.inf
    for move in get_possible_moves(state, state.turn):
        tmp_state = copy.deepcopy(state)
        make_move(tmp_state, move[0], move[1])
        tmp_utility = -alphabeta_negamax(tmp_state, depth - 1, -beta, -alpha)
        if(tmp_utility > utility):
            utility = tmp_utility
            if(depth == ALPHABETA_DEPTH_LIMIT):
                alphabeta_best_move = move
        if(utility >= beta):
            return utility
        alpha = max(alpha, utility)
    return utility

def alphabeta_ai_make_move(state):
    alphabeta_negamax(state, ALPHABETA_DEPTH_LIMIT, -math.inf, math.inf)
    make_move(state, alphabeta_best_move[0], alphabeta_best_move[1])