Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [2]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [50]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [5]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [6]:
def eliminate_two_row(state: Nim) -> Nimply:
    """cancel the first two elements row"""
    possible_moves = [(r, elements) for r, elements in enumerate(state.rows) if elements==2]
    if  not possible_moves:
        return pure_random(state)
    r,num_obj = possible_moves[0]
    return Nimply(r,num_obj)

In [7]:
def eliminate_one_row(state: Nim) -> Nimply:
    """cancel the first one element row"""
    possible_moves = [(r, elements) for r, elements in enumerate(state.rows) if elements==1]
    if  not possible_moves:
        return pure_random(state)
    r,num_obj = possible_moves[0]
    return Nimply(r,num_obj)

In [8]:
def leave_one_elem_row(state: Nim) -> Nimply:
    """I leave one row elem if possible"""
    possible_moves = [(r, elements-1) for r, elements in enumerate(state.rows) if elements>1]
    if  not possible_moves:
        return pure_random(state)
    r,num_obj = possible_moves[0]
    return Nimply(r,num_obj)

In [9]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))#using the lambada retrive the smallest first and second greater


In [119]:
class adaptive:
    def __init__(self) -> None:
        self.genome = {leave_one_elem_row: .2, eliminate_one_row: .2, pure_random: .2, eliminate_two_row: .2, optimal: .2 }
        self.tweak_value = .2
        #self.tweak_counter = 0
        #self.tweak_decrease = .0
        #self.genome = {pure_random: .2, optimal: .2 }

         
    def tweak(self) -> None:
        #self.tweak_counter += 1
        #if self.tweak_counter == 33:
        #    self.tweak_value = self.tweak_value-self.tweak_decrease
        #    self.tweak_counter = 0
        key = random.choice([eliminate_two_row, eliminate_one_row, optimal, pure_random, leave_one_elem_row])
        #key = random.choice([ optimal, pure_random])
        value = self.genome[key] + random.choice([1,-1])*self.tweak_value
        if value >= 0:
            self.genome[key] = value
                
    def adaptive(self, state: Nim) -> Nimply:
        rand = random.random()*sum(self.genome.values())
        for key,value in self.genome.items():
            rand -= value
            if rand <= 0:
                return key(state) 
            

            

In [48]:
import numpy as np


def nim_sum(state: Nim) -> int:
    counter = 0
    for c in state.rows:
        if c == 1:
            counter = counter + 1
        elif c != 0:
            counter= False
            break
    if counter != False and counter%2 == 1:
        return -1
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == -1]
    if not spicy_moves:
        spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
        if not spicy_moves:
            spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [52]:
logging.getLogger().setLevel(logging.INFO)

strategy = (eliminate_two_row, optimal)

nim = Nim(4)
logging.info(f"init : {nim}")
logging.info(f"starting nim sum {nim_sum(nim)}")#cancel
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    logging.info(f"nim sum={nim_sum(nim)}")#cancel
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7>
INFO:root:starting nim sum 0
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 2 5 7>
INFO:root:nim sum=1
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 2 5 7>
INFO:root:nim sum=0
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 0 5 7>
INFO:root:nim sum=2
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 0 5 5>
INFO:root:nim sum=0
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 0 5 3>
INFO:root:nim sum=6
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 0 3 3>
INFO:root:nim sum=0
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 2 3>
INFO:root:nim sum=1
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 2 2>
INFO:root:nim sum=0
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 0 0 2>
I

In [54]:

def game(player0,player1,dim):
    strategy = (player0, player1)
    nim = Nim(dim)
    player = 0

    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player
        
    return player


In [113]:
GAMES = 50
DIM = [4,5]

def fitness(player0,player1):
    result = 0
    for _ in range(GAMES):
        dim = random.choice(DIM)
        result = result + 1-game(player0,player1,dim)
    for _ in range(GAMES):
        dim = random.choice(DIM)
        result = result + game(player1,player0,dim)
    return result

In [121]:
current_state = adaptive()
fitness_current_state = fitness(current_state.adaptive,eliminate_one_row)
for _ in range(100):
    new_state = deepcopy(current_state)
    new_state.tweak()
    fitness_new_state = fitness(new_state.adaptive,eliminate_one_row)
    print(f"{fitness_current_state}-{fitness_new_state}")
    if fitness_new_state>=fitness_current_state:
        current_state = new_state
        fitness_current_state = fitness_new_state
        
        
pprint(current_state.genome)


69-56
69-63
69-77
77-64
77-72
77-72
77-72
77-74
77-66
77-73
77-61
77-71
77-71
77-62
77-70
77-53
77-74
77-72
77-64
77-64
77-77
77-67
77-79
79-74
79-77
79-86
86-82
86-84
86-86
86-92
92-94
94-82
94-93
94-100
100-88
100-100
100-78
100-75
100-78
100-100
100-100
100-100
100-100
100-88
100-82
100-80
100-100
100-100
100-90
100-100
100-100
100-100
100-100
100-100
100-100
100-79
100-81
100-100
100-100
100-100
100-89
100-100
100-93
100-100
100-100
100-100
100-100
100-91
100-99
100-100
100-91
100-99
100-92
100-100
100-100
100-100
100-89
100-100
100-92
100-100
100-89
100-90
100-93
100-100
100-100
100-88
100-87
100-100
100-100
100-100
100-100
100-94
100-99
100-100
100-90
100-100
100-100
100-100
100-97
100-100
{<function pure_random at 0x0000023CFCDB89A0>: 0.0,
 <function eliminate_one_row at 0x0000023CFCDB8D60>: 0.0,
 <function eliminate_two_row at 0x0000023CFCDB9620>: 0.0,
 <function leave_one_elem_row at 0x0000023CFCDB9E40>: 0.0,
 <function optimal at 0x0000023CFD26A3E0>: 1.4}
