In [43]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
from math import inf

In [44]:
Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None, player: int = random.randrange(2)) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k
        self._player = player
        self._last_move = tuple()

    def __hash__(self) -> int:
        return hash(tuple(self._rows))

    def __eq__(self, __o: object) -> bool:
        return hash(self) == hash(__o)

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def last_move(self) -> tuple:
        return tuple(self._last_move)

    @property
    def player(self) -> tuple:
        return self._player

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects
        self._last_move = ply
        self._player = 1 - self._player

In [45]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim) -> dict:
    cooked = dict()

    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    #cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    #cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

In [46]:
def optimal_startegy(state: Nim) -> Nimply:
    data = cook_status(state)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [47]:
# NIMSUM SECTION

def evaluate_board(state: Nim, cooked: dict):
    if state: 
        # if I stopped because of depth then I try to evaluate the current move based on
        # - the fact that the number of heaps is even (which is nice for my algo, bad for my opponent) and viceversa
        # - the fact that the move made leaves just one heap on the board, which is basically loosing
        val = 1 if sum(state.rows) % 2 == 0 else -1
        if cooked["active_rows_number"] == 1: val = -500
        return val
    else: return 500 if state.player == 0 else -500

def minmax(state: Nim, depth: int, alpha: int, beta: int, maximizingPlayer: bool) -> tuple:
    cooked = cook_status(state)
    if depth == 0 or not state:
        # I return
        # - the evaluation of the current board
        # - the last move played
        # - a boolean flag that specify if this evaulation was done to a leaf or 
        #   to an ongnoing configuration (so I made it just because I hit the depth limit)
        return (evaluate_board(state, cooked), state.last_move, depth == 0)
    
    # I hit the cache if I find the key made of the board config and the player turn in that config
    # don't want to use a cached value calculated for another player
    if (state, state.player) in cache:
        cache["hits"] += 1
        return cache[(state, state.player)]

    if maximizingPlayer:
        maxEval = (-inf, (-inf, -inf), True)
        for move in cooked["possible_moves"]:
            local_state = deepcopy(state)
            local_state.nimming(move)
            eval = minmax(local_state, depth - 1, alpha, beta, False)
            maxEval = max(maxEval, eval, key=lambda t: t[0])
            alpha = max(alpha, eval[0])
            if beta <= alpha: break
        if maxEval[2] == False: 
            # I add the evaluation to the cache only if this is a terminal state
            # if I add to cache also the stuff I calculate when I reach depth limit
            # this will be used in the future, but the evaluation would not be complete
            # so I may take bad decisions. It is worth avoiding cache in that case.
            cache[(state, state.player)] = maxEval
        return maxEval
    else:
        minEval = (inf, (inf, inf), True)
        for move in cooked["possible_moves"]:
            local_state = deepcopy(state)
            local_state.nimming(move)
            eval = minmax(local_state, depth - 1, alpha, beta, True)
            minEval = min(minEval, eval, key=lambda t: t[0])
            beta = min(beta, eval[0])
            if beta <= alpha: break
        if minEval[2] == False: 
            # I add the evaluation to the cache only if this is a terminal state
            # if I add to cache also the stuff I calculate when I reach depth limit
            # this will be used in the future, but the evaluation would not be complete
            # so I may take bad decisions. It is worth avoiding cache in that case.
            cache[(state, state.player)] = minEval
        return minEval

In [48]:
logging.getLogger().setLevel(logging.DEBUG)

cache = dict()
cache["hits"] = 0

strategy = (pure_random, minmax)

nim = Nim(7)
logging.debug(f"status: Initial board  -> {nim} | Player number {nim.player} starts")
while nim:
    ply = strategy[nim.player](nim) if nim.player == 0 else strategy[nim.player](nim, 5, -inf, inf, True)[1]
    nim.nimming(ply)
    logging.debug(f"status: After player {1 - nim.player} moved {ply} -> {nim}")
logging.info(f"status: Player {1 - nim.player} won!")

print(f"Cache hits: {cache['hits']}")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13> | Player number 0 starts
DEBUG:root:status: After player 0 moved Nimply(row=4, num_objects=5) -> <1 3 5 7 4 11 13>
DEBUG:root:status: After player 1 moved (2, 2) -> <1 3 3 7 4 11 13>
DEBUG:root:status: After player 0 moved Nimply(row=3, num_objects=5) -> <1 3 3 2 4 11 13>
DEBUG:root:status: After player 1 moved (2, 1) -> <1 3 2 2 4 11 13>
DEBUG:root:status: After player 0 moved Nimply(row=0, num_objects=1) -> <0 3 2 2 4 11 13>
DEBUG:root:status: After player 1 moved (2, 1) -> <0 3 1 2 4 11 13>
DEBUG:root:status: After player 0 moved Nimply(row=5, num_objects=9) -> <0 3 1 2 4 2 13>
DEBUG:root:status: After player 1 moved (3, 1) -> <0 3 1 1 4 2 13>
DEBUG:root:status: After player 0 moved Nimply(row=1, num_objects=3) -> <0 0 1 1 4 2 13>
DEBUG:root:status: After player 1 moved (4, 1) -> <0 0 1 1 3 2 13>
DEBUG:root:status: After player 0 moved Nimply(row=5, num_objects=2) -> <0 0 1 1 3 0 13>
DEBUG:root:status: After player 1 moved (6, 2

Cache hits: 277
