Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

**Deadline**

T.b.d.


In [11]:
import logging
import random

In [12]:
logging.basicConfig(level=logging.DEBUG)

In [13]:
class Player:
    def __init__(self, name:str, strategy):
        self._strategy = strategy
        self._name = name
        self._loser = False
        self._n_plies = 0

    def ply(self, state):
        self._n_plies += 1
        return self._strategy(self, state)
    
    @property
    def loser(self):
        return self._loser
    
    @loser.setter
    def loser(self, val):
        self._loser = val
    
    @property
    def name(self):
        return self._name
    
    @property
    def n_plies(self):
        return self._n_plies

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i*2 + 1 for i in range(num_rows)]
        self._k = k

    def nimming(self, row: int, num_objects: int, player:Player) -> None:
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects
        if sum(self._rows) == 0:
            player.loser = True
            #logging.info("Yeuch")
    
    @property
    def rows(self):
        return self._rows


def play(A:Player, B:Player, state:Nim):
    logging.debug(f"{state.rows} : initial state")
    while not (A.loser or B.loser):
        A.ply(state)
        logging.debug(f"{state.rows} : state after {A.name} ply")
        if not A.loser:
            B.ply(state)
            logging.debug(f"{state.rows} : state after {B.name} ply")
    if A.loser:
        print(f"{B.name} wins in {B.n_plies} plies")
    elif B.loser:
        print(f"{A.name} wins in {A.n_plies} plies")
    
def random_strategy(player:Player, heaps:Nim):
    # choose a random non-zero heap
    non_zero_heaps_idxs = [i for i, v in enumerate(heaps.rows) if v > 0]
    idx_heap = random.choice(non_zero_heaps_idxs)
    quantity = random.randint(1, heaps.rows[idx_heap])
    heaps.nimming(idx_heap, quantity, player)

## Task3.1 Nim-Sum

In [14]:
def nim_sum(l:list):
    sum = 0
    for _, v in enumerate(l):
        sum ^= v
    return sum

def nim_sum_strategy(player:Player, heaps: Nim):
    n_heaps = len(heaps.rows)
    n_heaps_to_zero = len([i for i, h in enumerate(heaps.rows) if h == 0])
    n_heaps_to_one = len([i for i, h in enumerate(heaps.rows) if h == 1])
    n_heaps_greater_than_zero = n_heaps - n_heaps_to_zero
    n_heaps_greater_than_one = n_heaps_greater_than_zero - n_heaps_to_one

    # manage particular situations
    if n_heaps_greater_than_zero % 2 == 0 and n_heaps_greater_than_one == 1: #[1, 2, 1, 1]
        # take all objects from the heap with more than 1 object
        heaps.nimming(heaps.rows.index(max(heaps.rows)), max(heaps.rows), player)
    elif n_heaps_greater_than_zero % 2 == 1 and n_heaps_greater_than_one == 1: #[1, 2, 1]
        # take all objects but 1 from the heap with more than 1 object
        heaps.nimming(heaps.rows.index(max(heaps.rows)), max(heaps.rows)-1, player)
    elif n_heaps_greater_than_one == 1 and n_heaps == 1: #[2, 0, 0]
        # take all objects but 1 from the last non zero heap with more than 1 object
        heaps.nimming(heaps.rows.index(max(heaps.rows)), max(heaps.rows)-1, player)
    elif n_heaps_to_one + n_heaps_to_zero == n_heaps:   #[1, 0, 1, 1]
        # take one object from the first heap
        heaps.nimming(heaps.rows.index(1), 1, player)
    else:  # normal game
        x = nim_sum(heaps.rows)
        y = [nim_sum([x, h]) for _, h in enumerate(heaps.rows)]
        chosen_heap_idx = random.choice([i for i, h in enumerate(heaps.rows) if y[i] < h])
        heaps.nimming(chosen_heap_idx, heaps.rows[chosen_heap_idx]-y[chosen_heap_idx], player)



In [16]:
heaps = Nim(10)
Alice = Player("Alice", nim_sum_strategy)
Bob = Player("Bob", random_strategy)
play(Alice, Bob, heaps)


DEBUG:root:[1, 3, 5, 7, 9, 11, 13, 15, 17, 19] : initial state
DEBUG:root:[1, 3, 5, 7, 9, 11, 13, 15, 17, 17] : state after Alice ply
DEBUG:root:[1, 3, 5, 7, 9, 11, 5, 15, 17, 17] : state after Bob ply
DEBUG:root:[1, 3, 5, 7, 1, 11, 5, 15, 17, 17] : state after Alice ply
DEBUG:root:[1, 3, 5, 7, 0, 11, 5, 15, 17, 17] : state after Bob ply
DEBUG:root:[0, 3, 5, 7, 0, 11, 5, 15, 17, 17] : state after Alice ply
DEBUG:root:[0, 3, 5, 7, 0, 7, 5, 15, 17, 17] : state after Bob ply
DEBUG:root:[0, 3, 5, 7, 0, 7, 5, 3, 17, 17] : state after Alice ply
DEBUG:root:[0, 3, 5, 7, 0, 7, 0, 3, 17, 17] : state after Bob ply
DEBUG:root:[0, 3, 0, 7, 0, 7, 0, 3, 17, 17] : state after Alice ply
DEBUG:root:[0, 0, 0, 7, 0, 7, 0, 3, 17, 17] : state after Bob ply
DEBUG:root:[0, 0, 0, 4, 0, 7, 0, 3, 17, 17] : state after Alice ply
DEBUG:root:[0, 0, 0, 4, 0, 7, 0, 0, 17, 17] : state after Bob ply
DEBUG:root:[0, 0, 0, 4, 0, 4, 0, 0, 17, 17] : state after Alice ply
DEBUG:root:[0, 0, 0, 3, 0, 4, 0, 0, 17, 17] : state a

Alice wins in 12 plies
