# Nim Policy Search

Nim is a simple game where two players take turns removing objects from a pile. The player who removes the last object loses. The game is described in detail [here](https://en.wikipedia.org/wiki/Nim). There is a mathematical strategy to win Nim, by ensuring you always leave the opponent with a nim-sum number of objects (groups of 1, 2 and 4).

In this notebook, we will play nim-sum using the following agents:
1. An agent using fixed rules based on nim-sum
2. An agent using evolved rules
3. An agent using minmax
4. An agent using reinforcement learning

> Sidharrth Nagappan, 2022

In [1]:
import logging
import random

In [2]:
class Nim:
    def __init__(self, num_rows: int, k: int = None):
        self.num_rows = num_rows
        self._k = k
        self.rows = [i*2+1 for i in range(num_rows)]

    def nimming_remove(self, row: int, num_objects: int):
        assert self.rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self.rows[row] -= num_objects
    
    def goal(self) -> bool:
        return sum(self.rows) == 0

In [34]:
class RandomPlayer:
    def __init__(self):
        self.num_moves = 0
    
    def play(self, nim: Nim):
        # Chose a random row and a random number of pieces
        row = random.randint(0, len(nim.rows)-1)
        if nim.rows[row] == 0:
            while nim.rows[row] == 0:
                row = random.randint(0, len(nim.rows)-1)
        to_remove = random.randint(1, nim.rows[row])
        self.num_moves += 1
        nim.nimming_remove(row, to_remove)        

In [35]:
from copy import deepcopy
from itertools import accumulate
from operator import xor

# 3.1: Agent Using Fixed Rules
class ExpertFixedRuleAgent:
    '''
    Play the game of Nim using a fixed rule 
    (always leave nim-sum at the end of turn)
    '''
    def __init__(self):
        self.num_moves = 0
    
    def nim_sum(self, nim: Nim):
        '''
        Returns the nim sum of the current game board
        by taking an XOR of all the rows.
        Ideally, agent should try to leave nim sum of 0 at the end of turn
        '''
        *_, result = accumulate(nim.rows, xor)
        return result
        # return sum([i^r for i, r in enumerate(nim._rows)])
    
    def play(self, nim: Nim):
        # remove objects from row to make nim-sum 0
        nim_sum = self.nim_sum(nim)
        all_possible_moves = [(r, o) for r, c in enumerate(nim.rows) for o in range(1, c+1)]     
        move_found = False
        for move in all_possible_moves:
            replicated_nim = deepcopy(nim)
            replicated_nim.nimming_remove(*move)
            if self.nim_sum(replicated_nim) == 0:
                nim.nimming_remove(*move)
                move_found = True
                break
        # if a valid move not found, return random move
        if not move_found:
            move = random.choice(all_possible_moves)
            nim.nimming_remove(*move)
        
        # print(f"Move {self.num_moves}: Removed {move[1]} objects from row {move[0]}")
        self.num_moves += 1
        
        # for i, row in enumerate(nim._rows):
        #     if row ^ nim_sum < row:
        #         nim.nimming_remove(i, row - (row ^ nim_sum))
        #         return        
    
    # remove objects to leave nim-sum at the end of turn
    # loop over rows and split objects into 1, 2, 4
    # remove left over objects

In [36]:
# random agent vs fixed rule agent

nim = Nim(20)
p1 = RandomPlayer()
p2 = ExpertFixedRuleAgent()
current_player = 1

while not nim.goal():
    if current_player == 1:
        p1.play(nim)
        print(f"Random plays: {nim.rows}")
        current_player = 2
    else:
        p2.play(nim)
        print(f"Expert plays: {nim.rows}")
        current_player = 1



Random plays: [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 21, 33, 35, 37, 39]
Expert plays: [1, 3, 5, 7, 3, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 21, 33, 35, 37, 39]
Random plays: [1, 3, 5, 7, 3, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 21, 33, 33, 37, 39]
Expert plays: [1, 1, 5, 7, 3, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 21, 33, 33, 37, 39]
Random plays: [1, 1, 5, 7, 3, 8, 13, 15, 17, 19, 21, 23, 25, 27, 29, 21, 33, 33, 37, 39]
Expert plays: [1, 1, 5, 4, 3, 8, 13, 15, 17, 19, 21, 23, 25, 27, 29, 21, 33, 33, 37, 39]
Random plays: [1, 1, 5, 4, 3, 8, 13, 15, 17, 19, 21, 23, 5, 27, 29, 21, 33, 33, 37, 39]
Expert plays: [1, 1, 5, 4, 3, 8, 13, 15, 13, 19, 21, 23, 5, 27, 29, 21, 33, 33, 37, 39]
Random plays: [0, 1, 5, 4, 3, 8, 13, 15, 13, 19, 21, 23, 5, 27, 29, 21, 33, 33, 37, 39]
Expert plays: [0, 0, 5, 4, 3, 8, 13, 15, 13, 19, 21, 23, 5, 27, 29, 21, 33, 33, 37, 39]
Random plays: [0, 0, 5, 4, 3, 5, 13, 15, 13, 19, 21, 23, 5, 27, 29, 21, 33, 33, 37, 39]
Expert plays: [0, 0, 5

In [None]:
# 3.2: Agent Using Evolved Rules
class EvolvedAgent:
    '''
    Plays Nim using a set of rules that are evolved
    '''
    def __init__(self):
        self.num_moves = 0
    
    def play(self, nim: Nim):
        all_possible_moves = [(r, o) for r, c in enumerate(nim.rows) for o in range(1, c+1)]
        active_rows_number = sum([1 for r in nim.rows if r > 0])
        shortest_row = min((x for x in enumerate(nim.rows) if x[1] > 0), key= lambda y: y[1])[0]
        longest_row = max([r for r in nim.rows if r > 0])
        nim_status = self.nim_sum(nim) == 0
        
        brute_force = list()
        for m in all_possible_moves:
            replicated_nim = deepcopy(nim)
            replicated_nim.nimming_remove(*m)
            brute_force.append((m, self.nim_sum(replicated_nim) == 0))
        

In [None]:
from typing import Callable


NUM_MATCHES = 10
NIM_SIZE = 10

def evaluate(strategy: Callable) -> float:
    opponent = (strategy, optimal_startegy)
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES