# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player taking the last object wins.

* Task3.3: An agent using minmax

In [113]:
from nim_utils import Nimply, Nim, evaluate, pure_random, level_three, expert_strategy
from functools import cache

## **MinMax for Nim**

In [114]:
MAX_DEPTH = 35 # defined empirically for a "fast" game

# Check terminal state
def check_teminal(rows, is_maximizing) -> int:
    if sum(rows) == 0:
        return -1 if is_maximizing else 1

# Get moves
def possible_new_states(rows):
    checked = set()
    for row, num_objs in enumerate(rows):
        for remain in range(num_objs):
            tuple_to_return = rows[:row] + (remain,) + rows[row + 1 :]
            if tuple(sorted(tuple_to_return)) in checked:
                continue
            else:
                checked.add(tuple(sorted(tuple_to_return)))
                yield tuple_to_return 

# Minmax
@cache
def minmax(rows, depth, is_maximizing, alpha=-1, beta=1):
    if (score := check_teminal(rows, is_maximizing)) is not None:
        return score
    # Over a certain depth, return zero
    if depth >= MAX_DEPTH:
        return 0
        
    scores = []
    for new_state in possible_new_states(rows):
        score = minmax(new_state, depth+1, not is_maximizing, alpha, beta)
        if score == 0:
            return 0
        scores.append(score)
        if is_maximizing:
            # Update alpha: min score of maximizing player
            alpha = max(alpha, score)
        else:
            # Update beta: maximum score of minimizing player
            beta = min(beta, score)
        # Do not consider further moves (maximizer already found a better solution than any of the unexplored ones)
        if beta <= alpha:
            break
    return (max if is_maximizing else min)(scores)
    
# Best move
def best_move(state: Nim) -> Nimply:
    maximum =  max(
        (minmax(new_state, depth=0, is_maximizing=False), new_state) for new_state in possible_new_states(state.rows)
    )
    return maximum if maximum != 0 else None

### **MinMax Strategy**

In [115]:
def minmax_strategy(board: Nim) -> Nimply:
    best = best_move(board) 
    if best is not None:
        _, new_state = best
        for idx, (curr_row, new_row) in enumerate(zip(board.rows, new_state)):
            if curr_row != new_row:
                return Nimply(idx, curr_row - new_row)
    else:
        idx = board.rows.index(max(board.rows))
        return Nimply(idx, 1)


#### Play "small" games against different strategies

In [116]:
print(evaluate(minmax_strategy, level_three, k=None, nim_size=5))
print(minmax.cache_info())
print(evaluate(minmax_strategy, expert_strategy, k=None, nim_size=6))
print(minmax.cache_info())
print(evaluate(minmax_strategy, pure_random, k=None, nim_size=6))
print(minmax.cache_info())

(1.0, 1.0)
CacheInfo(hits=120039, misses=25111, maxsize=None, currsize=25111)
(1.0, 0.0)
CacheInfo(hits=2906528, misses=442590, maxsize=None, currsize=442590)
(1.0, 1.0)
CacheInfo(hits=3196717, misses=481936, maxsize=None, currsize=481936)


#### Play "larger" games against different strategies

In [117]:
print(evaluate(minmax_strategy, level_three, k=None, nim_size=7))
print(minmax.cache_info())
print(evaluate(minmax_strategy, expert_strategy, k=None, nim_size=8))
print(minmax.cache_info())
print(evaluate(minmax_strategy, pure_random, k=None, nim_size=10))
print(minmax.cache_info())

(1.0, 1.0)
CacheInfo(hits=14517421, misses=2161519, maxsize=None, currsize=2161519)
(0.0, 0.0)
CacheInfo(hits=18557085, misses=2737241, maxsize=None, currsize=2737241)
(1.0, 1.0)
CacheInfo(hits=169800770, misses=25838002, maxsize=None, currsize=25838002)
