Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [11]:
import logging
from itertools import product
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import numpy as np
import time


## The *Nim* and *Nimply* classes

In [12]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [13]:
class Nim:
    def __init__(self, num_rows: int) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        self._rows[row] -= num_objects


## Sample (and silly) startegies

In [14]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [15]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [16]:
def me(state: Nim) -> Nimply:
    row = input("Row: ")
    num_objects = input("Num objects: ")
    return Nimply(int(row) -1, int(num_objects))

In [17]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    good_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not good_moves:
        good_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(good_moves)
    return ply

def spicy(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

## Adaptive Strategy
### The genotype
The adaptive strategy genotype is based on a multidimensional array that rappresents all the possbile states of the game, where each dimension rapresents a row of the game. In each cell of the array there is the next move that the strategy will do in that state.

#### Example:
`GAME_SIZE = 2`: A Game with 2 rows, one with 1 elements and one with 3 elements
The array is a 2x4 matrix where the first dimension rappresent the first row that can have from 0 up to 1 elements, and the second dimension rappresent the second row that can have from 0 up to 3 elements.

One possibile istance of the moves array is:

```
[[None                         Nimply(row=1, num_objects=1) Nimply(row=1, num_objects=2) Nimply(row=1, num_objects=3)]
 [Nimply(row=0, num_objects=1) Nimply(row=1, num_objects=1) Nimply(row=1, num_objects=1) Nimply(row=0, num_objects=1)]]
```

In this case the move that the strategy will do in the state (1, 3) is the cell `moves[1][3]` and it is `Nimply(row=0, num_objects=1)`. The next state after this move will be (0, 3), the other player do his move and then the adaptive strategy gives its next move based on the new state in the same way.

### The fitness function
The fitness function is really simple and it is 1 if the strategy wins the game and 0 if it loses the game.

In [18]:
class Adaptive:
    """A strategy that can adapt its parameters"""
    def __init__(self, dim: int, name: str = "") -> None:
        self.dim = dim
        self.name = name
        self.records = []
        
        loaded_moves, tries = self.load_moves()    
        
        self.moves = Adaptive.init_moves(dim)

        if loaded_moves.size:
            self.moves[..., *[0 for _ in range(tries)]] = loaded_moves
    
    def load_moves(self):
        """Load moves from the file if one strategy has already been trained, if no file is found, tries to load the moves for dim - 1"""
        loaded_moves = np.array([])
        tries = 0 
        while not loaded_moves.size and tries < self.dim:
            try:
                loaded_moves = np.load(f"{self.name}_adaptive_{self.dim - tries}.npy", allow_pickle=True)
            except FileNotFoundError:     
                tries += 1
        return loaded_moves, tries

    @staticmethod
    def init_moves(dim: int):
        """Init the moves array with random moves"""
        moves = np.empty(tuple([2*i+2 for i in range(dim)]), dtype=Nimply)
       
        for i in tuple(product(*[range(2*i+2)  for i in range(dim)])):
            if sum(i) > 0:    
                row = random.choice([r for r, c in enumerate(i) if c > 0])
                num_objects = random.randint(1, i[row])

                moves[i] = Nimply(row, num_objects)

        return moves


    def move(self, state: Nim) -> Nimply:
        """Used to get the move from the strategy"""
        self.records.append(state.rows)
        return self.moves[state.rows]
    
    def clean_records(self):
        self.records = []
    
    def mutation(self, ply: Nimply) -> None:
        """Mutate one move of the strategy randomly"""
        possible_rows = [r for r, c in enumerate(ply) if c > 0]
        row = random.choice(possible_rows)
        num_objects = random.randint(1, ply[row])
        self.moves[ply] = Nimply(row, num_objects)

    @staticmethod
    def get_moves(strategies: dict) -> tuple:
        """Get the moves of the strategies that won and the moves of the strategies that lost"""
        good_moves = {}
        bad_moves = set()

        for strategy, result in strategies.items():
            for rd in strategy.records:
                if result:
                    if rd in good_moves:
                        good_moves[rd].append(strategy.moves[rd])
                    else :
                        good_moves[rd] = [strategy.moves[rd]]
                else:
                    bad_moves.add(rd)
        
        bad_moves = bad_moves - good_moves.keys()
        
        return good_moves, bad_moves
        
    @staticmethod
    def get_candidates(strategies: dict, n_sample: int) -> dict:
        """Extract n_sample candidates from the strategies"""
        candidates = list(strategies.items())
        extracted = {deepcopy(random.choice([strat for strat, _ in candidates])): 0 for _ in range(n_sample)}
        return dict(extracted)

    @staticmethod
    def fake_get_candidates(strategies: dict) -> dict:
        """
        Does not really extract candidates, just returns a dict with the same keys and reset the win flag.
        Really faster than get_candidates
        """
        return {strat: 0 for strat in strategies.keys()}

    @staticmethod
    def next_epoch(strategies: dict, n_sample: int) -> list:
        """Get the next epoch of the strategies, selcting the best moves and mutating the bad ones from the previous epoch"""

        good_moves, bad_moves = Adaptive.get_moves(strategies)

        new_strategies = Adaptive.fake_get_candidates(strategies)

        for strat in new_strategies.keys():
            strat.clean_records()
            for pos, moves in good_moves.items(): 
                strat.moves[pos] = random.choice(moves)
            for pos in bad_moves:
                strat.mutation(pos)   

        return new_strategies  
    
    def save(self):
        """Save the trained moves of the strategy in a file"""
        np.save(f"{self.name}_adaptive_{self.dim}.npy", self.moves)

## Match

In [19]:
def match(nim: Nim, strategies: dict, start: bool = 0, verbose: bool = True) -> bool:
    """Play a match of nim between two strategies"""
    player = 1 - start
    if verbose:
            print(f"\tstatus: {nim}")
    while nim:
        player = 1 - player
        ply = strategies[player](nim)
        nim.nimming(ply)
        if verbose:
            print(f"\tply: player {player} plays {ply}")    
            print(f"\tstatus: {nim}")

    return player

## Training

In [100]:
GAME_SIZE = 6
N_EPOCH = 2000
N_POPULATION = 300
"""The strategies are stored in a dict, the key is the strategy and the value is 0 if the strategy lost and 1 if it won"""
apt_strategies = {Adaptive(GAME_SIZE, "1"): 0 for _ in range(N_POPULATION)}
apt_strategies2 = {Adaptive(GAME_SIZE, "2"): 0 for _ in range(N_POPULATION)}
win = 0

for i in range(N_EPOCH):
    for apt, apt2 in zip(apt_strategies.keys(), apt_strategies2.keys()):
        nim = Nim(GAME_SIZE)
        strategy = (apt.move, apt2.move)

        winner = match(nim, strategy, start=i%2, verbose=False)
        if winner == 0:
            apt_strategies[apt] += 1
            win += 1
        else:
            apt_strategies2[apt2] += 1

    if (i+1) % 10 == 0:
        print(f"\tEpoch: {i+1}/{N_EPOCH} Avg win rate: {(win*100/(N_POPULATION * (i+1))):.3f} %  ", end="\r")

    apt_strategies = Adaptive.next_epoch(apt_strategies, n_sample=N_POPULATION)
    apt_strategies2 = Adaptive.next_epoch(apt_strategies2, n_sample=N_POPULATION)

print(f"win rate: {(win*100/(N_EPOCH*N_POPULATION)):.3f} %" + " "*50)

final_apt = Adaptive.get_candidates(apt_strategies, n_sample=1).popitem()[0]
final_apt2 = Adaptive.get_candidates(apt_strategies2, n_sample=1).popitem()[0]

final_apt.save()
final_apt2.save()


win rate: 51.583 %                                                  
