In [1]:
from IPython.core.display import HTML
HTML(open('../style.css').read())

In [2]:
%load_ext nb_mypy

Version 1.0.5


In [3]:
from typing import TypeVar, Callable

In [4]:
State = TypeVar('State')

# Utilities

The global variable `gCache` is used as a cache for the function `evaluate` defined later.  Instead of just storing the values for a given `State`, the cache stores pairs of the form 
* `('=', v)`, 
* `('≤', v)`, or
* `('≥', v)`.

The first component of these pairs is a *flag* that specifies whether the stored value `v` is exact or whether it only is a lower or upper bound.  Concretely, provided `gCache[State, depth]` is defined and `value(State, depth)` computes the *value* of a given `State` from the perspective of the maximizing 
player, the following invariants are satisfied:
* $\texttt{gCache[State, depth]} = (\texttt{'='}, v) \rightarrow \texttt{value(State, depth)} = v$.
* $\texttt{gCache[State, depth]} = (\texttt{'≤'}, v) \rightarrow \texttt{value(State, depth)} \leq v$.
* $\texttt{gCache[State, depth]} = (\texttt{'≥'}, v) \rightarrow \texttt{value(State, depth)} \geq v$.

In [5]:
gCache: dict = {}

In order to have some variation in our game, we use random numbers to choose between optimal moves.

In [6]:
import random
random.seed(0)

# Alpha-Beta Pruning with Progressive Deepening, Move Ordering, and Memoization

In [7]:
State = TypeVar('State')

In [8]:
import ipycanvas as cnv

The declarations below are needed by the type checker.

In [9]:
gPlayers: list[str]
    
def next_states(S: State, player: str) -> list[State]: 
    return None # type: ignore

def utility(S: State) -> int | None:
    return None

def get_move(S: State) -> State:
    return None # type: ignore

def final_msg(S: State) -> bool:
    return None # type: ignore

def toString(S: State) -> str:
    return None # type: ignore

def draw(S: State, canvas: cnv.canvas, value: str) -> None:
    return None

def evaluate(S:     State,
             limit: int,
             f:     Callable[[State, int, float, float], float], 
             alpha: float = -1, 
             beta:  float =  1
            ) -> float:
    return None # type: ignore

def maxValue(S: State, limit: int, alpha: float, beta: float) -> float:
    return None # type: ignore

def minValue(S: State, limit: int, alpha: float, beta: float) -> float:
    return None # type: ignore

def store_cache(S: State, limit: int, alpha: float, beta: float, v: float) -> None:
    return None

def heuristic(S: State) -> float:
    return None # type: ignore

In [10]:
def finished(S: State) -> bool: 
    return utility(S) != None

## Progressive Deepening

The function `pd_evaluate` takes three arguments:
- `S`     is the current state of the game,
- `limit` specifies how deep the game tree is searched,
- `f`     is either the function `maxValue` or the function `minValue`.  
          If it is the maximizing player to move in `State`, `f` is equal to
          `maxValue`, else it is equal to `minValue`.

The function `pd_evaluate` uses *progressive deepening* to compute the value of `State`.  The given `State` is evaluated for a depth of $0$, $1$, $\cdots$, `limit`.  The values calculated for a depth of `level` are stored and used to sort the states when `State` is next evaluated for a depth of `level+1`.  This is beneficial for *alpha-beta pruning* because alpha-beta pruning can cut off more branches from the search tree if we start be evaluating the best moves first.  

In [11]:
def pd_evaluate(S:     State, 
                limit: int, 
                f:     Callable[[State, int, float, float], float]) -> float:
    for level in range(limit+1):
        value = evaluate(S, level, f)
        if value in [-1, 1]:  # the game has been decided in state S
            return value
    return value

The function `evaluate` takes five arguments:
- `S` is the current state of the game,
- `limit` determines the lookahead.  To be more precise, it is the number of *half-moves* that are investigated to compute the value.  If `limit` is 0 and the game has not ended, the game is evaluated via the function `heuristic`. The function `heuristic` has to be defined in the notebook defining the game.
- `f` is either the function `maxValue` or the function `minValue`.  

   `f = maxValue` if it's the maximizing player's turn in `State`.  Otherwise,
   `f = minValue`.
- `alpha` and `beta` are the parameters from *alpha-beta pruning*.

The function `evaluate` returns the *value* that the given `State` has if both players play their optimal game. 
- If the maximizing player can force a win, the return value is `+1`.
- If the minimizing player can force a win, the return value is `-1`.

Otherwise, an *approximate* value is calculated according to some *heuristic*.

For reasons of efficiency, the function `evaluate` is *memoized* using the global variable `gCache`.   This work in the same way as described in the notebook `Alpha-Beta-Pruning-Memoization.ipynb`.

In [12]:
def evaluate(S:     State, 
             limit: int, 
             f:     Callable[[State, int, float, float], float], 
             alpha: float = -1, 
             beta:  float =  1) -> float:
    global gCache
    if (S, limit) in gCache:
        flag, v = gCache[(S, limit)]    
        if flag == '=':
            return v
        if flag == '≤':
            if v <= alpha:
                return v
            else: 
                beta = min(beta, v)
        if flag == '≥':
            if beta <= v:
                return v
            else: 
                alpha = max(alpha, v)
    v = f(S, limit, alpha, beta)
    store_cache(S, limit, alpha, beta, v)
    return v

The function `store_cache` is called with five arguments:
* `State` is a state of the game,
* `limit` is the search depth,
* `alpha` is a number,
* `beta`  is a number, and
* `value` is a number such that:
  $$\texttt{evaluate(State, limit, f, alpha, beta)} = \texttt{value}$$
  
The function stores the `value` in the dictionary `Cache` under the key `State`.
It also stores an indicator that is either `'≤'`, `'='`, or `'≥'`.  The value that is stored 
satisfies the following conditions:
* If `Cache[State, limit] = ('≤', value)`, then `evaluate(State, limit) ≤ value`. 
* If `Cache[State, limit] = ('=', value)`, then `evaluate(State, limit) = value`. 
* If `Cache[State, limit] = ('≥', value)`, then `evaluate(State, limit) ≥ value`. 

In [13]:
def store_cache(S: State, limit: int, alpha: float, beta: float, value: float) -> None:
    global gCache
    if value <= alpha:
        gCache[(S, limit)] = ('≤', value)
    elif value < beta:
        gCache[(S, limit)] = ('=', value)
    else: # value >= beta
        gCache[(S, limit)] = ('≥', value)

The function `value_cache` receives a `State` and a `limit` as parameters.  If a *value* for `State` has been computed to the given evaluation depth, this value is returned. Otherwise, `None` is returned.

In [None]:
def value_cache(S: State, limit: int) -> float:
    _, value = gCache.get((S, limit), ('?', None))
    return value

The module [`heapq`](https://docs.python.org/3/library/heapq.html) implements [heaps](https://en.wikipedia.org/wiki/Heap_(data_structure)).  The implementation of `maxValue` and `minValue` use heaps as *priority queues* in order to sort the moves.  This improves the performance of *alpha-beta pruning*.

In [None]:
import heapq

The function `maxValue` satisfies the following specification:
- $\alpha \leq \texttt{value}(s) \leq \beta \;\rightarrow\;\texttt{maxValue}(s, l, \alpha, \beta) = \texttt{value}(s)$
- $\texttt{value}(s) < \alpha \;\rightarrow\; \texttt{maxValue}(s, l, \alpha, \beta) \leq \alpha$
- $\beta < \texttt{value}(s) \;\rightarrow\; \beta \leq \texttt{maxValue}(s, \alpha, \beta)$

It assumes that `gPlayers[0]` is the maximizing player.  This function implements *alpha-beta pruning*.  After searching up to a depth of `limit`, the value is approximated using the function `heuristic`. 

**Nota Bene:**
The reason we write
```
val = value_cache(ns, limit-2)
```
instead of 
```
val = value_cache(ns, limit-1)
```
is explained in detail in the lecture notes.

In [None]:
def maxValue(S: State, limit: int, alpha: float=-1, beta: float=1) -> float:
    if finished(S):
        return utility(S) # type: ignore
    if limit == 0:
        return heuristic(S)
    value      = alpha
    NextStates = next_states(S, gPlayers[0])
    Moves: list[tuple[float, State]] = []  # empty priority queue
    for ns in NextStates:
        val = value_cache(ns, limit-2) 
        if val == None:
            val = -1 # unknown values are assumed to be worse than known values
        # heaps are sorted ascendingly, hence the minus
        heapq.heappush(Moves, (-val, ns))
    while Moves != []:
        _, ns = heapq.heappop(Moves)
        value = max(value, evaluate(ns, limit-1, minValue, value, beta))
        if value >= beta:
            return value
    return value

The function `minValue` satisfies the following specification:
- $\alpha \leq \texttt{value}(s) \leq \beta \;\rightarrow\;\texttt{minValue}(s, l, \alpha, \beta) = \texttt{value}(s)$
- $\texttt{value}(s) < \alpha \;\rightarrow\; \texttt{minValue}(s, l, \alpha, \beta) \leq \alpha$
- $\beta < \texttt{value}(s) \;\rightarrow\; \beta \leq \texttt{minValue}(s, \alpha, \beta)$

It assumes that `gPlayers[1]` is the minimizing player.  This function implements *alpha-beta pruning*.  After searching up to a depth of `limit`, the value is approximated using the function `heuristic`. 

In [None]:
def minValue(S: State, limit: int, alpha: float = -1, beta: float = 1) -> float:
    if finished(S):
        return utility(S) # type: ignore
    if limit == 0:
        return heuristic(S)
    value      = beta
    NextStates = next_states(S, gPlayers[1])
    Moves: list[tuple[float, State]] = []  # empty priority queue
    for ns in NextStates:
        val = value_cache(ns, limit-2)
        if val == None:
            val = 1
        heapq.heappush(Moves, (val, ns))
    while Moves != []:
        _, ns = heapq.heappop(Moves)
        value = min(value, evaluate(ns, limit-1, maxValue, alpha, value))
        if value <= alpha:
            return value
    return value

In [None]:
%%capture
%run Connect-Four.ipynb

In [None]:
%unload_ext nb_mypy

In the state shown below, its is the turn of `Red`.  This player can force a win by pushing his stones in the $7^\textrm{th}$ column.  Due to this fact, *alpha-beta pruning* is able to prune large parts of the search path and hence the evaluation is fast.

In [None]:
print(toString(gTestState)) # type:ignore

In [None]:
gCache = {}

In [None]:
%%time
value = pd_evaluate(gTestState, 10, maxValue)
value

In [None]:
len(gCache)

For the start state, the evaluation takes about $8$ seconds, if the depth limit is set to `10`.

In [None]:
gCache = {}

In [None]:
%%time
value = pd_evaluate(gStart, 10, maxValue)
value

In [None]:
len(gCache)

In order to evaluate the effect of *progressive deepening*, we reset the cache and can then evaluate the test state without progressive deepening.

In [None]:
gCache = {}

In [None]:
%%time
value = evaluate(gStart, 10, maxValue)
value

This time, it takes about 21 seconds to evaluate the start state.  We see that the overhead of *progressive deepening* is more than compensated by the increased efficiency of *alpha-beta pruning*.  This effect gets even stronger after a number of moves have been played.

In [None]:
len(gCache)

## Playing the Game

The function `best_move` takes two arguments:
- `State` is the current state of the game,
- `limit` is the depth limit of the recursion.

The function `best_move` returns a pair of the form $(v, s)$ where $s$ is a state and $v$ is the value of this state.  The state $s$ is a state that is reached from `State` if the player makes one of her optimal moves.  In order to have some variation in the game, the function randomly chooses any of the optimal moves.

In [None]:
def best_move(S: State, limit: int) -> tuple[float, State]:
    NextStates = next_states(S, gPlayers[0])
    bestValue  = pd_evaluate(S, limit, maxValue)
    BestMoves  = [s for s in NextStates
                    if evaluate(s, limit-1, minValue) == bestValue
                 ]
    BestState  = random.choice(BestMoves)
    return bestValue, BestState

The next line is needed because we need the function `IPython.display.clear_output` to clear the output in a cell.

In [None]:
import IPython.display 

In [None]:
import time

The function `play_game` plays on the given `canvas`.  The game played is specified indirectly by specifying the following:
- `Start` is a global variable defining the start state of the game.
- `next_states` is a function such that $\texttt{next_states}(s, p)$ computes the set of all possible states that can be reached from state $s$ if player $p$ is next to move.
- `finished` is a function such that $\texttt{finished}(s)$ is true for a state $s$ if the game is over in state $s$.
- `utility` is a function such that $\texttt{utility}(s, p)$ returns either `-1`, `0`, or `1` in the *terminal state* $s$.  We have that
  - $\texttt{utility}(s, p)= -1$ iff the game is lost for player $p$ in state $s$, 
  - $\texttt{utility}(s, p)=  0$ iff the game is drawn, and 
  - $\texttt{utility}(s, p)=  1$ iff the game is won for player $p$ in state $s$.

In [None]:
def play_game(limit: int) -> None:
    global gCache
    State   = gStart # type: ignore
    History = []
    while (True):
        gCache = {}
        firstPlayer = gPlayers[0]
        start       = time.time()
        val, State  = best_move(State, limit)
        stop        = time.time()
        diff        = round(stop - start, 2)
        History.append(diff)
        print(toString(State))
        print(f'{round(diff, 2)} seconds, value = {round(val, 2)}.')
        if finished(State):
            final_msg(State)
            break
        State = get_move(State)
        print(toString(State))
        if finished(State):
            final_msg(State)
            break
    for i, d in enumerate(History):
        print(f'{i}: {d} seconds')

In [None]:
play_game(10)

In [None]:
len(gCache)