Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [458]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy

## The *Nim* and *Nimply* classes

In [459]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [460]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

## Sample (and silly) startegies 

In [461]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [462]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [463]:
import numpy as np

def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked

def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


# Evolutional Strategy approach

### Genome

In [464]:
def generate_random_genome():
    random_genome = {
        "preference": random.uniform(0, 1),  # Imposta un valore casuale tra 0 e 1 per la preferenza
        "use_lower_half": random.choice([True, False]),  # Sceglie casualmente tra True e False
        "fitness": 0 
    }   
    return random_genome

### Adaptive Function

In [465]:
def adaptive_strategy(genome):
    def adaptive(state):
        # Analizza lo stato del gioco per ottenere informazioni sulle possibili mosse
        analysis = analize(state)
        possible_moves = analysis['possible_moves']

        # Calcola il minimo e il massimo NimSum tra tutte le possibili mosse
        nimsum_values = list(possible_moves.values())
        min_nimsum = min(nimsum_values)
        max_nimsum = max(nimsum_values)
        threshold = (min_nimsum + max_nimsum) / 2

        preference = genome.get("preference", 0.5)
        use_lower_half = genome.get("use_lower_half", True)

        valid_moves = [ply for ply, _ in possible_moves.items()]

        if random.random() < preference:
            if use_lower_half:
                # Scegli tra le mosse con NimSum nell'intervallo tra il minimo e il valore medio
                valid_moves = [ply for ply, ns in possible_moves.items() if min_nimsum <= ns <= threshold]
            else:
                # Scegli tra le mosse con NimSum nell'intervallo tra il valore medio e il massimo
                valid_moves = [ply for ply, ns in possible_moves.items() if threshold <= ns <= max_nimsum]

        # Scegli casualmente tra tutte le possibili mosse
        chosen_move = random.choice(valid_moves) if valid_moves else random.choice(list(possible_moves.keys()))

        return chosen_move

    return adaptive

### Parent Selection

In [466]:
def tournament_selection(population, tournament_size):
     # Seleziona casualmente gli individui per il torneo
    tournament_participants = random.sample(list(population), tournament_size)
    # Trova l'individuo migliore nel torneo basato su una funzione di fitness
    winner = max(tournament_participants, key=lambda player: player['fitness'])
    return winner

### Crossover

In [467]:
def crossover(parent1, parent2):
    # Calcola i pesi basati sulla fitness dei genitori
    total_fitness = parent1['fitness'] + parent2['fitness']
    weight_parent1 = parent1['fitness'] / total_fitness
    weight_parent2 = parent2['fitness'] / total_fitness

    # Creazione di un genoma figlio
    child_genome = {
        "preference": None,
        "use_lower_half": None,
        "fitness": 0
    }

    # Pondera la scelta delle chiavi specifiche sulla fitness dei genitori
    for key in ["preference", "use_lower_half"]:
        child_genome[key] = parent1[key] if random.random() < weight_parent1 else parent2[key]

    return child_genome

### Mutation

In [468]:
def mutate(child_genome, mutation_rate):
     # Mutazione per la chiave "preference"
    if random.random() < mutation_rate:
        child_genome["preference"] = random.uniform(0, 1)
    # Mutazione per la chiave "use_lower_half"
    if random.random() < mutation_rate:
        child_genome["use_lower_half"] = random.choice([True, False])

    return child_genome

### Generate Population

In [469]:
def generate_new_population(selected_parents, n_individuals, mutation_rate):
    new_population = []
    # Crossover e mutazione per generare nuovi individui fino a raggiungere la dimensione desiderata
    while len(new_population) < n_individuals:
        # Seleziona due genitori casuali dalla lista dei genitori selezionati
        parent1 = random.choice(selected_parents)
        parent2 = random.choice(selected_parents)
        
        # Applica crossover per creare un figlio
        child_genome = crossover(parent1, parent2)

        # Applica mutazione al genoma del figlio
        child_genome = mutate(child_genome, mutation_rate)

        # Aggiungi l'individuo con il genoma mutato alla nuova popolazione
        new_population.append(child_genome)

    return new_population

### Simulation

In [470]:
def play_nim_game(strategy1, strategy2, initial_nim_state):
    nim = initial_nim_state
    player = 0
    while nim:
        ply = strategy1(nim) if player == 0 else strategy2(nim)
        nim.nimming(ply)
        player = 1 - player
    return player

In [471]:
def find_best_player(population):
    best_player = max(population, key=lambda x: x["fitness"])
    return best_player

In [472]:
def simulation(generations, n_games, pop_size, n_parents, tournament_size, mutation_rate):
  #Genera una popolazione random
  population = np.array([[generate_random_genome() for _ in range(pop_size)]])

  #Inizializza contatori
  count_games_x_generation = 0
  count_win_x_generation = 0
  percentage_won_x_generation = 0
  count_win_vs_gabriele = 0
  count_win_vs_pure_random = 0
  count_win_vs_optimal = 0


  #Inizia la simulazione
  for generation in range(generations):

    #Stampe per le statistiche
    print(f"Generazione {generation+1}:")

    #Per ogni giocatore x partite
    for player in population[generation]:
      for _ in range(n_games):

        count_games_x_generation += 1

        adaptive = adaptive_strategy(player)
        strategy = (adaptive, optimal)
        winner = play_nim_game(strategy[0], strategy[1], Nim(5))
        #Aggiorna la fitness se il giocatore vince
        if winner == 0:
          player["fitness"] +=1

          count_win_x_generation += 1
          
    #Stampe per le statistiche
    percentage_won_x_generation = count_win_x_generation / count_games_x_generation * 100
    print(f"They won {count_win_x_generation} games in {count_games_x_generation} games")
    print(f"The percentage of won games of this generation is: {percentage_won_x_generation}%")
    count_games_x_generation = 0
    count_win_x_generation = 0
    
    selected_parents = []

    #Seleziona x parent per la nuova popolazione
    for _ in range(n_parents):
      selected_parents.append(tournament_selection(population[generation], tournament_size))
      
    new_population = generate_new_population(selected_parents, pop_size, mutation_rate)

    population=np.append(population, [new_population], axis=0)

  best_player = find_best_player(population[generation])
  
  print("Best Player vs Gabriele Strategy:")
  for _ in range(1000):
    adaptive = adaptive_strategy(best_player)
    strategy = (adaptive, gabriele)
    winner = play_nim_game(strategy[0], strategy[1], Nim(5))
    #Aggiorna la fitness se il giocatore vince
    if winner == 0:
      count_win_vs_gabriele += 1
  print(f"They won {count_win_vs_gabriele} games in 1000 games")
  print(f"The percentage of won games is: {count_win_vs_gabriele / 10}%")

  print("Best Player vs Pure Random Strategy:")
  for _ in range(1000):
    adaptive = adaptive_strategy(best_player)
    strategy = (adaptive, pure_random)
    winner = play_nim_game(strategy[0], strategy[1], Nim(5))
    #Aggiorna la fitness se il giocatore vince
    if winner == 0:
      count_win_vs_pure_random += 1
  print(f"They won {count_win_vs_pure_random} games in 1000 games")
  print(f"The percentage of won games is: {count_win_vs_pure_random / 10}%")

  print("Best Player vs Optimal: The Rematch!")
  for _ in range(1000):
    adaptive = adaptive_strategy(best_player)
    strategy = (adaptive, optimal)
    winner = play_nim_game(strategy[0], strategy[1], Nim(5))
    #Aggiorna la fitness se il giocatore vince
    if winner == 0:
      count_win_vs_optimal += 1
  print(f"They won {count_win_vs_optimal} games in 1000 games")
  print(f"The percentage of won games is: {count_win_vs_optimal / 10}%")
  
  return population

In [473]:
pop = simulation(generations= 10, n_games=50, pop_size=100, n_parents=50, tournament_size=20, mutation_rate = 0.10)

Generazione 1:


They won 1857 games in 5000 games
The percentage of won games of this generation is: 37.14%
Generazione 2:
They won 1937 games in 5000 games
The percentage of won games of this generation is: 38.74%
Generazione 3:
They won 1970 games in 5000 games
The percentage of won games of this generation is: 39.4%
Generazione 4:
They won 2009 games in 5000 games
The percentage of won games of this generation is: 40.18%
Generazione 5:
They won 1967 games in 5000 games
The percentage of won games of this generation is: 39.34%
Generazione 6:
They won 2017 games in 5000 games
The percentage of won games of this generation is: 40.339999999999996%
Generazione 7:
They won 2066 games in 5000 games
The percentage of won games of this generation is: 41.32%
Generazione 8:
They won 2025 games in 5000 games
The percentage of won games of this generation is: 40.5%
Generazione 9:
They won 2013 games in 5000 games
The percentage of won games of this generation is: 40.26%
Generazione 10:
They won 2015 games in 50