Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

## Teammates

* Worked with Massimo Porcheddu and Miriam Ivaldi



In [4315]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from random import random, choice, randint

from copy import deepcopy
import random
from collections import namedtuple
from dataclasses import dataclass
from copy import copy, deepcopy


from pprint import pprint

import numpy as np


## The *Nim* and *Nimply* classes

In [4316]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [4317]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [4318]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [4319]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [4320]:
def soluzione(state: Nim) -> Nimply:

    POPULATION_SIZE = 50
    OFFSPRING_SIZE = 20
    TOURNAMENT_SIZE = 2
    MUTATION_PROBABILITY = .15

    def fitness(state):
        ###todolater
        nim_sum = 0
        for i in range(NUM_ROWS):
            nim_sum ^= state[i]
        return -nim_sum

    @dataclass
    class Individual:
        fitness: int
        genotype: list[int]

    def select_parent(pop):
        pool = [choice(pop) for _ in range(TOURNAMENT_SIZE)]
        if pool[0].genotype[1]>= current_state[pool[0].genotype[0] ] and pool[1].genotype[1]>= current_state[pool[1].genotype[0]]:
            return Individual(fitness=None, genotype=[randint(0,NUM_ROWS-1), 0])
        elif pool[0].genotype[1]>= current_state[pool[0].genotype[0]]:
            return pool[1]
        elif pool[1].genotype[1]>= current_state[pool[1].genotype[0]]:
            return pool[0]

        champion = max(pool, key=lambda i: i.fitness)
        return champion

    def mutate(ind: Individual) -> Individual:
        offspring = deepcopy(ind)
        offspring.genotype[1]+=1
        offspring.fitness = None
        return offspring



    current_state =[]
    for i,_ in enumerate(state._rows):
        if(_!=0):
            current_state.append(_)
    NUM_ROWS = len(current_state)


    counter_1 = 0
    counter_more_1 = 0
    indice =0
    for i,_ in enumerate(current_state):
        if(_ == 1):
            counter_1 +=1
        else:
            counter_more_1 += 1
            indice= i
        
        if(counter_more_1>1):
            break
    #print(counter_more_1, current_state)

    if(counter_more_1 == 1):
        if((counter_1 % 2) == 1):
            mossa = [indice, current_state[indice]]
            #print("====>dispari", mossa, counter_1)
        elif((counter_1 % 2) == 0) :
            mossa =  [indice, (current_state[indice] - 1)]  
            #print("====>pari", mossa, counter_1)
         

        counter = 0
        for i,_ in enumerate(state._rows):
            if _ == 0:
                mossa[0] +=1
            counter +=1
            if(counter>mossa[0]):
                break
        return Nimply(mossa[0], mossa[1])

    population = [
    Individual(
        genotype= [_, 1],
        #genotype = np.concatenate([np.zeros(_, dtype=int), [1], np.zeros(NUM_ROWS-_-1, dtype=int)]),
        fitness=None,
    )
    for _ in range(NUM_ROWS)
]

    for _, i in enumerate(population):
        new_state= copy(current_state)
        new_state[i.genotype[0]]-= i.genotype[1]
        i.fitness = fitness(new_state)
    for generation in range(100):
        
        offspring = list()
        for counter in range(OFFSPRING_SIZE):
            #if random() < 1:  # self-adapt mutation probability
            # mutation  # add more clever mutations
            p = select_parent(population)
            o = mutate(p)
            #print("parent is {}, offspring is {}".format(p , o))

            offspring.append(o)

        for i in offspring:
            new_state= copy(current_state)
            new_state[i.genotype[0]]-= i.genotype[1]
            i.fitness = fitness(new_state) 
            #print(new_state, i) 
            
        population.extend(offspring)
        population.sort(key=lambda i: i.fitness, reverse=True)
        population = population[:POPULATION_SIZE]
        if(population[0].fitness==0):
            break
    mossa = [population[0].genotype[0], population[0].genotype[1]]
    counter = 0
    for i,_ in enumerate(state._rows):
        if _ == 0:
            mossa[0] +=1
        counter +=1
        if(counter>mossa[0]):
            break

    return Nimply(mossa[0], mossa[1])

In [4321]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    POPULATION_SIZE = 20
    OFFSPRING_SIZE = 10
    TOURNAMENT_SIZE = 2
    MUTATION_PROBABILITY = .15
    #NUM_ROWS = len(current_state)
    @dataclass
    class Individual:
        fitness: int
        genotype: Nimply

        def __str__(self):
            return "Genotype: {}, Fitness: {} ".format(self.genotype, self.fitness)
    
    def select_parent(pop):
        pool = [choice(pop) for _ in range(TOURNAMENT_SIZE)]
        champion = max(pool, key=lambda i: i.fitness)
        return champion
    
    # randomly modify the value taken in a row by + or - one exept in some cases when one or both are not possible
    # if ind cannot be modified (cannot add nor minus 1), a new randome ind is created 
    def mutate(ind: Individual) -> Individual:
        offspring=deepcopy(ind)
        offspring.fitness=None
        if(state.rows[ind.genotype[0]] == 1):
            offspring.genotype=pure_random(state)
            return offspring
        elif(state.rows[ind.genotype[0]] == ind.genotype[1]):
            offspring.genotype= Nimply(ind.genotype[0], ind.genotype[1] - 1)
        elif(ind.genotype[1] == 1):
            offspring.genotype= Nimply(ind.genotype[0], ind.genotype[1] + 1)
        else:
            if(random.random() < 0.5):
                offspring.genotype= Nimply(ind.genotype[0], ind.genotype[1] - 1)
            else:
                offspring.genotype= Nimply(ind.genotype[0], ind.genotype[1] + 1)
        return offspring


    #take the less amount (genotype[1]) between 2 ind and take the row of the other
    def xover(ind1: Individual, ind2: Individual) -> Individual:
        if(ind1.genotype[1] > ind2.genotype[1]):
            offspring= Individual(fitness=None, genotype= Nimply(ind1.genotype[0], ind2.genotype[1]))
        else:
            offspring=Individual(fitness=None, genotype= Nimply(ind2.genotype[0], ind1.genotype[1]))
        return offspring

    # continue a pseudo game after taking a move between 2 optimal strategie
    # 10 games are evaluated for each move, a score is calculated according to won games    
    def fitness(state):
        if(not state):
            return 0
        contatore = 0
        # prestazioni migliori si hanno con range(1) e strategy = (perfetta, perfetta)
        # tuttavia per il sake dell'evolutionary algorithm (generalità verso un non rigged game) ho preferito mettere la soluzione con il confronto tra le optimal
        for _ in range(10):
            nim = deepcopy(state)
            strategy = (optimal, optimal)

            #logging.info(f"init : {nim}")
            player = 0
            while nim:
                ply = strategy[player](nim)
                #logging.info(f"ply: player {player} plays {ply}")
                nim.nimming(ply)
                #logging.info(f"status: {nim}")
                player = 1 - player
            # se vince il player 1 ritorna 1, altrimenti 0
            contatore += player
        return contatore
    
    #population is a bunch of random moves (Nimply)

    population = [
    Individual(
        genotype=pure_random(state),
        fitness=None,
    )
    for _ in range(POPULATION_SIZE)
    ]

    for i in population:
        new_state= deepcopy(state)
        new_state.nimming(i.genotype)
        i.fitness = fitness(new_state)
    #for _ in population:
    #   print(_)


    #standard EA implementation
    for generation in range(100):
        offspring = list()
        for counter in range(OFFSPRING_SIZE):
            if random.random() < MUTATION_PROBABILITY:  # self-adapt mutation probability
            # mutation  # add more clever mutations
                p = select_parent(population)
                o = mutate(p)
            #print("parent is {}, offspring is {}".format(p , o))
            else:
                p1 = select_parent(population)
                p2 = select_parent(population)
                o = xover(p1, p2)
            offspring.append(o)

        for i in offspring:
            new_state= deepcopy(state)
            new_state.nimming(i.genotype)
            i.fitness = fitness(new_state)
            
        population.extend(offspring)
        population.sort(key=lambda i: i.fitness, reverse=True)
        population = population[:POPULATION_SIZE]
    return population[0].genotype

In [4322]:
def finale(state: Nim) :
    counter_1 = 0
    counter_more_1 = 0
    indice =0
    for i,row in enumerate(state.rows):
        if(row == 1):
            counter_1 +=1
        else:
            counter_more_1 += 1
            indice= i
        
        if(counter_more_1>1):
            break

    if(counter_more_1 == 1):
        if((counter_1 % 2) == 1):
            return Nimply(indice, state.rows[indice])
            #print("====>dispari", mossa, counter_1)
        elif((counter_1 % 2) == 0) :
            return Nimply(indice, (state.rows[indice] - 1))
            #print("====>pari", mossa, counter_1)
    return False

In [4323]:
def perfetta(state: Nim)-> Nimply:
    def fitness(state: Nim):
        nim_sum = 0
        for row in state.rows:
            nim_sum ^= row
        return nim_sum
    nim_sum = fitness(state)
    if(nim_sum == 0):
        return pure_random(state)
    
    ###finale####
    counter_1 = 0
    counter_more_1 = 0
    indice =0
    for i,row in enumerate(state.rows):
        if(row == 1):
            counter_1 +=1
        elif(row > 1):
            counter_more_1 += 1
            indice= i
        
        if(counter_more_1>1):
           break
    
    if(counter_more_1 == 1):
        if((counter_1 % 2) == 1):
            return Nimply(indice, state.rows[indice])
        elif((counter_1 % 2) == 0) :
            return Nimply(indice, (state.rows[indice] - 1))
        
    #####
    
    while nim_sum>0 :
        for i, row in enumerate(state.rows):
            if(row < nim_sum):
                continue
            new_state = deepcopy(state)
            new_state.nimming(Nimply(i, nim_sum))
            if(fitness(new_state) == 0):
                return  Nimply(i, nim_sum)


        nim_sum -= 2


In [4324]:
print(perfetta(Nim(4)))

Nimply(row=3, num_objects=3)


In [4325]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Oversimplified match

In [4326]:
player_victories=[0,0]

for i in range(1):
    logging.getLogger().setLevel(logging.INFO)

    strategy = (adaptive, optimal)

    nim = Nim(4)

    logging.info(f"init : {nim}")
    player = 0
    while nim:
        ply = strategy[player](nim)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
        player = 1 - player
    logging.info(f"status: Player {player} won!")
    player_victories[player] += 1
    
print(player_victories)


INFO:root:init : <1 3 5 7>


INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 2 5 7>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=5)
INFO:root:status: <1 2 5 2>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 1 5 2>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=2)
INFO:root:status: <1 1 3 2>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <1 1 2 2>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 1 2 1>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <1 1 0 1>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 1 0 1>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 0 0 1>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 0>
INFO:root:status: Player 0 won!


[1, 0]
