# Lab 3 of Computationa Intelligence
### Ricardo Nicida Kazama

## Policy Search
### Task
Write agents able to play [Nim](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound  on the number of objects that can be removed in a turn (a.k.a., subtraction game).

The player taking the last object wins.

- Task3.1: An agent using fixed rules based on nim-sum (i.e., an expert system)
- Task3.2: An agent using evolved rules
- Task3.3: An agent using minmax
- Task3.4: An agent using reinforcement learning

In [2]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [53]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

    def undo(self, ply: Nimply) -> None:
        row, num_objects = ply
        self._rows[row] += num_objects


### Sample (and silly) startegies

In [5]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))
    

### Task 3.1
An agent using fixed rules based on nim-sum (i.e., an expert system)

The main function of this section is the nim_sum_strategy that is made of two other functions. The solution is based on the nim-sum where the values are exhaustive tested. Both regressive and progressive searches where similiar in terms of number of plys. 

In [141]:
def nim_sum(state: Nim) -> int:
    """
    Computes the nim_sum 
    """
    rows = state.rows
    xor = rows[0]
    for row in rows[1:]:
        xor = xor ^ row
    return xor


def take_one(state: Nim) -> Nimply:
    """
    Take one object from the row with maximum number of objects.
    """
    rows = state.rows
    rows_index = range(len(rows))
    max_row = max(zip(rows,rows_index), key=lambda x: x[1])
    return Nimply(max_row[1], 1)


def progressive_nim_sum_strategy(state: Nim) -> Nimply:
    """
    Seraches by testing each possible value for each row from up (smaller row) to bottom (biggest row).
    It makes the smallest ply possible that still makes the nim sum goes to zero.
    """
    rows = state.rows
    rows_index = range(len(rows))
    for row, i in zip(rows, rows_index):
        if row > 0:
            for value in range(1, row+1):
                state.nimming(Nimply(i, value))
                if nim_sum(state) == 0:
                    state.undo(Nimply(i, value))
                    return Nimply(i, value)
                state.undo(Nimply(i, value))


def regressive_nim_sum_strategy(state: Nim) -> Nimply:
    """
    Opposite of progressive_nim_sum_strategy
    Seraches by testing each possible value for each row from bottom (biggest row) to up (smaller row).
    It makes the biggest ply possible that still makes the nim sum goes to zero.
    """
    rows = state.rows
    rows_index = range(len(rows))
    rows_zip = list(zip(rows, rows_index))
    rows_zip.reverse()
    for row, i in rows_zip:
        if row > 0:
            for value in range(row, 0, -1):
                state.nimming(Nimply(i, value))
                if nim_sum(state) == 0:
                    state.undo(Nimply(i, value))
                    return Nimply(i, value)
                state.undo(Nimply(i, value))

def nim_sum_strategy(state: Nim) -> Nimply:
    """
    Uses the take_one function when the nim sum is equal to zero and uses the regressive_nim_sum_strategy otherwise.
    """
    ns = nim_sum(state)
    if ns == 0:
        return take_one(state)
    else:
        return regressive_nim_sum_strategy(state)
        # return progressive_nim_sum_strategy(state)
    

In [167]:
# print the match
def print_nim(state, player, status):
    if player == None:
        print("Initial board" + status)
    else:
        print(f"After player {player}" + status)
    for i in range(len(state.rows)):
        n = state.rows[i]
        print("|"*n + "."*(i*2+1-n))

def status(state):
    state_str = f"{state}"
    return f"-> {state_str:>25} | nim_sum = {nim_sum(state):2}"

### Match

In [169]:
logging.getLogger().setLevel(logging.DEBUG)

# strategy = (pure_random, nim_sum_strategy)
strategy = (nim_sum_strategy, pure_random)

nim = Nim(4)


logging.debug("status: Initial board "+ status(nim))
print_nim(nim, None, status(nim))

ply_counter = 0
player = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player}" + status(nim))
    print_nim(nim, player, status(nim))
    player = 1 - player
    ply_counter += 1
winner = 1 - player

final_message = f"status: Player {winner} won in ply #{ply_counter}!"
logging.info(final_message)
print(final_message)

DEBUG:root:status: Initial board ->                 <1 3 5 7> | nim_sum =  0
DEBUG:root:status: After player 0->                 <1 3 5 6> | nim_sum =  1
DEBUG:root:status: After player 1->                 <0 3 5 6> | nim_sum =  0
DEBUG:root:status: After player 0->                 <0 3 5 5> | nim_sum =  3
DEBUG:root:status: After player 1->                 <0 3 4 5> | nim_sum =  2
DEBUG:root:status: After player 0->                 <0 1 4 5> | nim_sum =  0
DEBUG:root:status: After player 1->                 <0 1 1 5> | nim_sum =  5
DEBUG:root:status: After player 0->                 <0 1 1 0> | nim_sum =  0
DEBUG:root:status: After player 1->                 <0 1 0 0> | nim_sum =  1
DEBUG:root:status: After player 0->                 <0 0 0 0> | nim_sum =  0
INFO:root:status: Player 0 won in ply #9!


Initial board->                 <1 3 5 7> | nim_sum =  0
|
|||
|||||
|||||||
After player 0->                 <1 3 5 6> | nim_sum =  1
|
|||
|||||
||||||.
After player 1->                 <0 3 5 6> | nim_sum =  0
.
|||
|||||
||||||.
After player 0->                 <0 3 5 5> | nim_sum =  3
.
|||
|||||
|||||..
After player 1->                 <0 3 4 5> | nim_sum =  2
.
|||
||||.
|||||..
After player 0->                 <0 1 4 5> | nim_sum =  0
.
|..
||||.
|||||..
After player 1->                 <0 1 1 5> | nim_sum =  5
.
|..
|....
|||||..
After player 0->                 <0 1 1 0> | nim_sum =  0
.
|..
|....
.......
After player 1->                 <0 1 0 0> | nim_sum =  1
.
|..
.....
.......
After player 0->                 <0 0 0 0> | nim_sum =  0
.
...
.....
.......
status: Player 0 won in ply #9!
