# Algorithmen



In [1]:
%run ./Muehle_Logic.ipynb


In [2]:
%run ./Muehle_Utilities.ipynb

In [3]:
%run ./Muehle_Heuristik.ipynb

In [4]:
START_STATE = [[9, 9], [          # Anzahl zu setzender Steine (Spieler_1 (Weiß), Spieler_2 (Braun))
            [0, 0, 0, 0, 0, 0, 0, 0], # ring 0
            [0, 0, 0, 0, 0, 0, 0, 0], # ring 1
            [0, 0, 0, 0, 0, 0, 0, 0]  # ring 2
      ]]
board7 = [[0, 0],
         [[0, 0, 1, 2, 0, 1, 0, 0],
          [2, 0, 0, 0, 0, 0, 0, 0],
          [0, 2, 0, 0, 0, 2, 0, 0]]]

def to_tupel(State):
    return ((State[0][0], State[0][1]), (
                (State[1][0][0],State[1][0][1],State[1][0][2],State[1][0][3],State[1][0][4],State[1][0][5],State[1][0][6],State[1][0][7]),
                (State[1][1][0],State[1][1][1],State[1][1][2],State[1][1][3],State[1][1][4],State[1][1][5],State[1][1][6],State[1][1][7]),
                (State[1][2][0],State[1][2][1],State[1][2][2],State[1][2][3],State[1][2][4],State[1][2][5],State[1][2][6],State[1][2][7])
            ))

def to_list(State):
    return [[State[0][0], State[0][1]], [
                [State[1][0][0],State[1][0][1],State[1][0][2],State[1][0][3],State[1][0][4],State[1][0][5],State[1][0][6],State[1][0][7]],
                [State[1][1][0],State[1][1][1],State[1][1][2],State[1][1][3],State[1][1][4],State[1][1][5],State[1][1][6],State[1][1][7]],
                [State[1][2][0],State[1][2][1],State[1][2][2],State[1][2][3],State[1][2][4],State[1][2][5],State[1][2][6],State[1][2][7]]
    ]]

to_tupel(START_STATE)

to_list(to_tupel(START_STATE))

[[9, 9],
 [[0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 0, 0]]]

Die Funktion `memoize(f)` aus der "Wissensbasierte Systeme"-Vorlesung nimmt ein Argument f und gibt eine Version dieser Funktion zurück, die alle Ergebnisse der Funktion f zwischenspeichert.

In [5]:
def memoize(f):
    global Cache
    
    def f_memoized(*args):
        key = (args[0], args[1])
        if key in Cache:
            return Cache[key]
        result = f(*args)
        Cache[key] = result
        return result
    
    return f_memoized

## Minimax-Algorithmus

In [6]:
import random
random.seed(1)

`value_minimax(State, player)`

In [7]:
@memoize
def value_minimax(State, player, depth):
    if finished(to_list(State)):
        return utility(to_list(State), player)
    if depth == 0:
        return heuristic(State, player)
    o = opponent(player)
    depth = depth - 1
    return max([ -value_minimax(to_tupel(ns), o, depth) for ns in next_states(to_list(State), player) ])

`best_move_minimax(State, player)`

In [8]:
def best_move_minimax(State, player, depth):
    NS        = next_states(State, player)
    bestVal   = value_minimax(to_tupel(State), player, depth)
    BestMoves = [s for s in NS if -value_minimax(to_tupel(s), opponent(player), depth) == bestVal]
    BestState = random.choice(BestMoves)
    return bestVal, BestState

Die Funktion `minimax(State, Player)` wurde erstellt, um die Funktion `best_move_minimax(State, player)` nach Außen eindeutiger von `alpha_beta_pruning(State, Player)` abzugrenzen.

In [9]:
def minimax(State, player, depth = 4):
    return(best_move_minimax(State, player, depth))

## Alpha-Beta-Pruning

In [10]:
Cache = {}

`value_ab(State, player, alpha=-1, beta=1)` 

In [11]:
def value_ab(State, player, alpha=-1, beta=1, depth = 4):
    #print('Value-depth: '+str(depth))
    global Cache
    #to_tupel
    State = to_tupel(State)
    if State in Cache:
        val, a, b = Cache[State]
        if a <= alpha and beta <= b:
            return val
        else:
            alpha = min(alpha, a)
            beta  = max(beta , b)
            val   = alphaBeta(State, player, alpha, beta, depth=depth)
            Cache[State] = val, alpha, beta
            return val
    else:
        val = alphaBeta(State, player, alpha, beta, depth=depth)
        Cache[State] = val, alpha, beta
        return val

`alphaBeta(State, player, alpha, beta)`

In [12]:
def alphaBeta(State, player, alpha, beta, depth):
    #print('AB-depth: '+str(depth))
    #tolist 
    State = to_list(State)
    if finished(State):
        return utility(State, player)
    if depth == 0:
        return heuristic(State, player)
    val = alpha
    for ns in next_states(State, player):
        #print('Tiefe auf: '+str(depth)+' verringert')
        val = max(val, -value_ab(ns, opponent(player), -beta, -alpha, depth=depth-1))
        if val >= beta:
            return val
        alpha = max(val, alpha)
    return val

In [13]:
def best_move_ab(State, player, depth = 4):
    NS        = next_states(State, player)
    bestVal   = value_ab(State, player, depth = depth)
    BestMoves = [s for s in NS if -value_ab(s, opponent(player), depth = depth-1) == bestVal]
    BestState = random.choice(BestMoves)
    return bestVal, BestState

Die Funktion `alpha_beta_pruning(State, Player)` wurde erstellt, um die Funktion `best_move(State, player)` nach Außen eindeutiger von `minimax(State, Player)` abzugrenzen.

In [14]:
def alpha_beta_pruning(State, player, depth = 4):
    return(best_move_ab(State, player, depth = depth))

In [15]:
# import time
# start = time.time()
# alpha_beta_pruning(START_STATE, 1, 5) #mit depth=5 15sec/ depth=6 85sec (X5) (vlt. noch symmetrische Zustände berücksichtigen)
# end = time.time()
# print(str(end-start)+'sec')

In [16]:
import time
start = time.time()
state = [[5, 6], [[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 2, 0, 2, 0], [0, 0, 0, 2, 0, 0, 0, 0]]]
player = 2
print(alpha_beta_pruning(state, player, depth = 4))
end = time.time()
print(str(end-start)+'sec')

(0.0, [[5, 5], [[0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 2, 2, 2, 0], [0, 0, 0, 2, 0, 0, 0, 0]]])
1.9899983406066895sec
