Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [149]:
import logging
from pprint import pformat
from collections import namedtuple
import random
from copy import deepcopy
from math import ceil
from tqdm.notebook import tqdm

## The *Nim* and *Nimply* classes

In [150]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [151]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

## Sample (and silly) startegies 

In [152]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(
        1, state.rows[row] if state._k is None else min(state.rows[row], state._k)
    )
    return Nimply(row, num_objects)

In [153]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [
        (r, o if state._k is None else min(state.rows[r], state._k))
        for r, c in enumerate(state.rows)
        for o in range(1, c + 1)
    ]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

In [154]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (
        Nimply(r, o if raw._k is None else min(raw.rows[r], raw._k))
        for r, c in enumerate(raw.rows)
        for o in range(1, c + 1)
    ):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

## Expert Strategy

This should be a better version of the optimal strategy since it gives priority to the moves where more matches are taken, it also takes into account the upper bound k if defined.

In [155]:
def expert(raw: Nim) -> Nimply:
    singles = []
    not_singles = []
    for i in range(len(raw.rows)):
        if raw.rows[i] == 1:
            singles.append(i)
        elif raw.rows[i] > 1:
            not_singles.append((i, raw.rows[i]))
    if len(not_singles) == 1 and (raw._k is None or not_singles[0][1] <= raw._k):
        if len(singles) % 2:
            return Nimply(not_singles[0][0], not_singles[0][1])
        else:
            return Nimply(not_singles[0][0], not_singles[0][1] - 1)
    val = min(max(raw.rows), 999 if raw._k is None else raw._k)
    while val > 0:
        for i in reversed(range(len(raw.rows))):
            if raw.rows[i] == 0:
                continue
            test = deepcopy(raw)
            ply = Nimply(
                i,
                val
                if test.rows[i] >= val
                else min(test.rows[i], 999 if test._k is None else raw._k),
            )
            test.nimming(ply)
            if nim_sum(test) != 0:
                return ply
        val -= 1
    if val == 0:
        for i in range(len(raw.rows)):
            if int(raw.rows[i]) > 0:
                return Nimply(i, min(raw.rows[i], 999 if raw._k is None else raw._k))

In [156]:
# this class allows to represent each implemented strategies so far in a single callable, useful when cycling through every strat
class Player:
    def __init__(self, strategy: str) -> None:
        self.strategy = strategy

    def play(self, raw: Nim) -> Nimply:
        if self.strategy == "expert":
            return expert(raw)
        if self.strategy == "gabriele":
            return gabriele(raw)
        if self.strategy == "optimal":
            return optimal(raw)
        if self.strategy == "pure_random":
            return pure_random(raw)

## Adaptive

In [157]:
class Adaptive:
    def __init__(self) -> None:
        self.genome = {
            "love_small_row": random.random(),
            "love_small_num": random.random(),
        }
        self.fitness = 0

    def play(self, nim: Nim, GAME_DIM=5):
        # row is chosen based on the weight "love_small_row" which makes more likely to choose a short row the higher its value is
        row = random.choice(
            [
                i
                for i in range(
                    min(
                        ceil(max((1 - self.genome["love_small_row"]), 1e-3) * GAME_DIM),
                        GAME_DIM,
                    )
                )
            ]
        )
        row = (
            row
            if nim.rows[row] > 0
            else random.choice([r for r in range(GAME_DIM) if nim.rows[r] > 0])
        )
        # row is chosen based on the weight "love_small_num" which makes more likely to choose a low amount of matches to be taken the higher its value is
        number = random.choice(
            [
                i
                for i in range(
                    1,
                    min(
                        ceil(
                            max((1 - self.genome["love_small_num"]), 1e-3)
                            * nim.rows[row]
                        )
                        + 1,
                        nim.rows[row] + 1,
                    ),
                )
            ]
        )
        # if the selected value is above a set k, limit it to k
        number = number if nim._k is None else min(nim._k, number)
        ply = Nimply(row, number)
        return ply

    def increment_fitness(self, result: int) -> None:
        self.fitness += result

    def get_fitness(self) -> int:
        return self.fitness

    def reset_fitness(self) -> None:
        self.fitness = 0

    def mutate(self, sig) -> None:
        self.genome["love_small_row"] += random.gauss(0, sig)
        if self.genome["love_small_row"] < 0:
            self.genome["love_small_row"] = 1e-5
        self.genome["love_small_num"] += random.gauss(0, sig)
        if self.genome["love_small_num"] < 0:
            self.genome["love_small_num"] = 1e-5

In [158]:
random.choice([i for i in range(ceil((1 - random.random()) * 5))])
[print(i) for i in range(1, ceil(0.1))]

[]

In [159]:
# a bit more versatile than the original simple match implemented
def game(strategy: list, GAME_DIM=5, verbose=False) -> int:
    nim = Nim(GAME_DIM)
    player = 0
    while nim:
        ply = strategy[player].play(nim)
        if verbose:
            print(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        if verbose:
            print(f"status: {nim}")
        player = 1 - player
    if verbose:
        print(f"status: Player {player} won!")
    return player

## Tournament Selection

In [160]:
mu = 10
lam = 100
sig = 0.1
GENERATIONS = 100
strategies = ["expert", "optimal", "gabriele", "pure_random"]

population = [Adaptive() for _ in range(lam)]

for step in tqdm(range(GENERATIONS)):
    # reset population ranking since it's an incremental value
    for p in population:
        p.reset_fitness()
    # create offspring starting from mu random individuals
    parents = [np.random.choice(population) for _ in range(mu)]
    offspring = parents * (lam // mu)
    # offspring mutation
    for o in offspring:
        o.mutate(sig)
    # add the parents to the population to be evaluated
    offspring += parents    
    # compute every individual's fitness based on number of games won (10 games: half playing first and half playing second, against every strategy)
    for i in offspring:
        for strat in strategies:
            player = Player(strat)
            adaptive_wins = 0
            for _ in range(5):
                i.increment_fitness(1 - game([i, player]))
                i.increment_fitness(game([player, i]))
    offspring = sorted(offspring, key=lambda a: a.get_fitness(), reverse=True)
    # set the next population
    population = offspring

winner = offspring[0]

  0%|          | 0/100 [00:00<?, ?it/s]

## Benchmark

In [161]:
N_GAMES = 100

# the best individual competes with every other strategy to produce a benchmark
for strat in strategies:
    player = Player(strat)
    adaptive_wins = 0
    for _ in range(int(N_GAMES / 2)):
        if game([winner, player]) == 0:
            adaptive_wins += 1
        if game([player, winner]):
            adaptive_wins += 1
    print(
        f"Adaptive champion wins {adaptive_wins/N_GAMES*100}% of the time against {player.strategy}"
    )

Adaptive champion wins 55.00000000000001% of the time against expert
Adaptive champion wins 25.0% of the time against optimal
Adaptive champion wins 100.0% of the time against gabriele
Adaptive champion wins 56.00000000000001% of the time against pure_random
