# Lab 2: ES
## RUBEN TETAMO 317569
## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [18]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from math import exp, sqrt
from copy import deepcopy
import numpy as np

## The *Nim* and *Nimply* classes

In [19]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [20]:
class Nim:
    def __init__(self, num_rows: int, k: int ) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]  #lista in cui i-esimo elemento rappresenta il numero di elementi della i-esima riga
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">" #str è un metodo che converte int in stringa

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:  #serve a fare una mossa nel gioco e garantisce che questa sia valida
        row, num_objects = ply
        assert num_objects>0
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k  #k cos'è? E' il massimo numero che un giocatore può rimuovere in una riga
        self._rows[row] -= num_objects


## Strategies 

In [21]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0]) #praticamente r è l'indice della riga, mentre c è il valore (numero di elementi nella riga)
    num_objects = random.randint(1, min([state._k, state.rows[row]]) ) 
    return Nimply(row, num_objects)


In [22]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, min([state._k,c]))] #ritorna una lista di tuple (indice_riga, possibile_mossa)
    if possible_moves:
        return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))    #max agisce sulla lista possible_moves. La ordina prima in base r decrescente e possibile mossa crescente
    else:
        for r,c in enumerate(state.rows): 
            if c>0:
                return Nimply(r,c)                                                                    #in pratica sceglie elemento con riga più basso e possibile_mossa più alto


In [23]:
def take_first(state:Nim) -> Nimply:
    """Pick always one item from the first non-empty row"""
    noe_rows = np.array([r for r, c in enumerate(state.rows) if c > 0]) #praticamente r è l'indice della riga, mentre c è il valore (numero di elementi nella riga)
    num_objects = 1
    return Nimply(noe_rows[0], num_objects)

In [24]:
def take_last(state:Nim) -> Nimply:
    """Pick always one item from the last non-empty row"""
    noe_rows = np.array([r for r, c in enumerate(state.rows) if c > 0]) #praticamente r è l'indice della riga, mentre c è il valore (numero di elementi nella riga)
    num_objects = 1
    return Nimply(noe_rows[len(noe_rows)-1], num_objects)

In [25]:
def take_from_middle(state:Nim) -> Nimply:
    """Pick always one item from a random row, excluding the frontier"""
    noe_rows = np.array([r for r, c in enumerate(state.rows) if c > 0]) #praticamente r è l'indice della riga, mentre c è il valore (numero di elementi nella riga)
    num_objects = 1
    if len(noe_rows)>2:
        return Nimply(noe_rows[random.randint(1,len(noe_rows)-2)], num_objects)
    else:
        return Nimply(noe_rows[random.randint(0,len(noe_rows)-1)], num_objects)

In [26]:
def take_one_from_max(state:Nim) -> Nimply:
    """Pick always one item from the row with most values"""
    row= state._rows.index(max(state._rows))
    num_objects = 1
    return Nimply(row, num_objects)

In [27]:
def take_random_from_max(state:Nim) -> Nimply:
    """Pick a random number of item from the row with most values"""
    row= state._rows.index(max(state._rows))
    num_objects = random.randint(1,min([state.rows[row], state._k]))
    return Nimply(row, num_objects)

In [28]:
def take_one_from_min(state:Nim)-> Nimply:
    """Pick always one item from the row with less values"""
    row=[r for r,c in sorted(enumerate(state._rows)) if c>0][0]
    num_objects = 1
    return Nimply(row, num_objects)

In [29]:
def take_random_from_min(state:Nim) -> Nimply:
    """Pick a random number of item from the row with less values"""
    row=[r for r,c in sorted(enumerate(state._rows)) if c>0][0]
    num_objects = random.randint(1,min([state.rows[row], state._k]))
    return Nimply(row, num_objects)

In [30]:
def take_joker(state:Nim)-> Nimply:
    """If number of values in nim is odd takes random items from max, else takes random items from min"""
    if sum(state._rows) %2 ==0:
        return take_random_from_max(state)
    else:
        return take_random_from_min(state)

Optimal strategy

In [31]:

def nim_sum(state: Nim) -> int:  #calcola la somma di nim dello stato
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:  #restituisce un dizionario dove a ogni possibile mossa è associato la nim_sum
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, min(raw._k+1,c + 1))):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

#  la funzione optimal utilizza l'analisi delle possibili mosse e le somme di Nim per selezionare una mossa non perdente 
#  in modo casuale se disponibile, altrimenti sceglie casualmente tra tutte le possibili mosse
def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Game functions

In [32]:
logging.getLogger().setLevel(logging.WARNING)
def game(agent1,agent2, nim_size, k)-> int:
    strategy = (agent1, agent2)
    nim = Nim(nim_size,k)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    return player

In [46]:
def game2(strategy_list,p, nim_size, k)-> int:
    agent1=np.random.choice(strategy_list, p=p)
    strategy = [agent1, pure_random]
    nim = Nim(nim_size,k)
    logging.info(f"init : {nim}")
    player = 0
    while nim:
        strategy[0]=np.random.choice(strategy_list, p=p)
        ply = strategy[player](nim)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    return player

In [34]:
def num_wins(agent1, agent2, nim_size, num_matches, k)-> list:
    wins=[0,0]
    while num_matches:
        nim=Nim(nim_size,k)
        player=game(agent1, agent2, nim_size,k)
        wins[player]+=1
        num_matches-=1
    return wins

## Adaptive algorithm

 Fitness

In [36]:
#comes from a previous implementation. Totally wrong, discard.
def fitness(state:Nim, ply:Nimply):
    s=deepcopy(state)
    s.nimming(ply)
    return nim_sum(s)+ ply[1]/100

In [37]:
#first version of a correct fitness, it gave bad results because it changes too much 
def fitness1(p: np.array, agents: np.array):
    NUM_GAMES=1000
    p=p/p.sum()
    wins=[0,0]
    for _ in range(NUM_GAMES):
      wins[game( agent1=np.random.choice(agents, p=p) ,agent2=pure_random, nim_size=5,k=4)] +=1
     

    return wins[0]/NUM_GAMES

In [57]:
#good one. maybe it would be better with an higher NUM_GAMES value, but tests would be too slow 
def fitness2(p: np.array, agents: np.array):
    NUM_GAMES=500
    p=p/p.sum()
    wins=[0,0]
    for _ in range(NUM_GAMES):
      wins[game2(agents,p,5,4)] +=1
    
    return wins[0]/NUM_GAMES

In [40]:
def mutate(p_weights, sigma)-> np :
    res=np.array([_+ random.gauss(0,sigma) for r,_ in enumerate(p_weights)])
    res=(res/res.sum())*100
    return np.clip(res, a_min=0, a_max=None)

In [None]:
def update_sigma(sigma,tau, success) -> float:
    return sigma*exp(1/3) if success else sigma/exp(1/12)
s=.3
pprint(update_sigma(s,1/sqrt(2), True))
pprint(update_sigma(s,1/sqrt(2), False))

## (mu+lambda)

In [None]:
PARENTS_NUMBER=3
OFFSPRING_NUMBER=5
EPOCH = 10
#, take_first, take_last, take_from_middle, take_joker, take_one_from_max, take_one_from_min, take_random_from_min,take_random_from_max
agents=np.array([gabriele,optimal])
tau=1/sqrt(len(agents))
sigma= 5

#creation of parent population
p_population=[]
for _ in range(PARENTS_NUMBER):
    p=np.array([random.randint(1,1000) for _ in agents])
    p=100*p/p.sum()
    f=fitness2(p,agents)
    p_population.append((p,f))

pprint(p_population)
#initialize offspring as an empty list
offspring=[]
success=0
for _ in range(EPOCH):
    
    #for every parent generate his offspring by mutation
    for i in range(PARENTS_NUMBER):
        for _ in range(OFFSPRING_NUMBER):
            o_weights=mutate(p_population[i][0],sigma)
            o_fitness=fitness2(o_weights, agents)
            offspring.append( (o_weights, o_fitness) )
            success += 1 if(o_fitness>p_population[i][1]) else 0
    #add parents to offspring
    for l in range(PARENTS_NUMBER):
        offspring.append(p_population[l])
    #sort offspring by fitness (descending)
    offspring.sort(key= lambda x: x[1], reverse=True)
    
    #update population with individual with best fitness
    p_population=[]
    for k in range(PARENTS_NUMBER):
        p_population.append(offspring[k])
    #update sigma
    sigma= update_sigma(sigma, tau, success> OFFSPRING_NUMBER*PARENTS_NUMBER/6)
    pprint([sigma,p_population[0]])
    #re-initialize offspring and success for next iteration
    offspring=[]
    success=0
pprint(p_population)
pprint(fitness2(p_population[0][0],agents))

First attempt of adaptive algorithm (ignore)

In [None]:
ERA=100
def adaptive1(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""

    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_item= random.randint(1,state._rows[row])

    genome = {"love_small": 0.4, "row" : int(row), "num_item": int(num_item)}
    parent_ply=Nimply(genome["row"], genome["num_item"])
    best_fitness =fitness(state,parent_ply)
    best_ply=parent_ply
    for _ in range(ERA):
        row=row+ random.gauss(0,genome["love_small"])
        num_item=num_item+ random.gauss(0,genome["love_small"])
        
        row = row if row< len(state._rows) else len(state._rows)-1
        row = row if row>=0 else 0
        if state.rows[int(row)]==0 :
            row=random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_item= num_item if num_item >=1 else 1
        num_item= num_item if num_item <= state.rows[int(row)] else state.rows[int(row)]

        genome["row"]=int(row)
        genome["num_item"]=int(num_item)
        offspring_ply= Nimply(genome["row"], genome["num_item"])
        offspring_fitness=fitness(state, offspring_ply )
        if offspring_fitness>best_fitness:
            best_fitness=offspring_fitness
            best_ply=offspring_ply

    return best_ply        

1+1 algorithm (ignore)

In [None]:
ERA=100
tau=0.707
def adaptive2(state: Nim) -> Nimply:
    """A strategy 1+1"""

    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_item= random.randint(1,state._rows[row])
    sigma= 0.4
    
    parent={"row":row, "num_item":num_item, "sigma":sigma}
    parent_ply=Nimply(parent["row"], parent["num_item"])
    parent_fitness =fitness(state,parent_ply)
    
    successes=[]

    for _ in range(ERA):
        #sigma update. only every 5 steps
        if _ >=5 and _%5 ==0 and successes[_-1]+successes[_-2]+successes[_-3]+successes[_-4]+successes[_-5] >1:
            parent["sigma"]=parent["sigma"]*np.exp(tau*random.gauss(0,parent["sigma"])).astype(np.float64)
        if _ >=5 and _%5 ==0 and successes[_-1]+successes[_-2]+successes[_-3]+successes[_-4]+successes[_-5] <=1 :
            parent["sigma"]=parent["sigma"]/np.exp(12).astype(np.float64)

        row=parent["row"]+ random.gauss(0,parent["sigma"])
        num_item=parent["num_item"]+ random.gauss(0,parent["sigma"])
        
        #checking if new row is admissible
        row = row if row< len(state._rows) else len(state._rows)-1
        row = row if row>=0 else 0
        if state.rows[int(row)]==0 :
            row=random.choice([r for r, c in enumerate(state.rows) if c > 0])
        
        #checking if num_item is admissible
        num_item= num_item if num_item >=1 else 1
        num_item= num_item if num_item <= state.rows[int(row)] else state.rows[int(row)]

        #generation of new offspring from parent mutation
        offspring=dict()
        offspring["row"]=int(row)
        offspring["num_item"]=int(num_item)
        offspring["sigma"]=parent["sigma"]
        offspring_ply= Nimply(offspring["row"], offspring["num_item"])

        #offspring evaluation
        offspring_fitness=fitness(state, offspring_ply )

        if offspring_fitness>parent_fitness:
            parent=offspring
            successes.append(1)
        else : successes.append(0)

    parent_ply=Nimply(parent["row"], parent["num_item"])   

    return parent_ply        

## Oversimplified match

In [None]:
wins=[0,0]
NUM_MATCHES=100
logging.getLogger().setLevel(logging.INFO)

strategy = (adaptive2, adaptive1)
while NUM_MATCHES:
    nim = Nim(5)
    #logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        #logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        #logging.info(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    wins[player] +=1
    NUM_MATCHES -=1

pprint(wins)
