# LAB 2

In [1]:
import logging
from pprint import pformat
from collections import namedtuple
import random
from copy import deepcopy
from dataclasses import dataclass

## The *Nim* and *Nimply* classes

In [2]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [3]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)] #just building the rows, given a number of sticks
        self._k = k #maximum number of sticks you can remove from a row

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)
    
    @property #added to return the max number of sticks that can be removed
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [4]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row] if state.k is None else min(state.rows[row], state.k))
    return Nimply(row, num_objects)

In [5]:
import numpy as np

def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1 if state.k is None else min(c + 1, state.k))):
        tmp = deepcopy(state)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


In [6]:
def optimal(state: Nim) -> Nimply: #already defined for the lab -> it tries to choose a move that implies nim sum not equal to 0
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    logging.debug(pformat(f"{analysis['possible_moves']}"))
    ply = random.choice(spicy_moves)
    return ply

## Building an expert system (fixed rules)

Assuming that the version of the game is the one in which the person who takes the last stick loose, an expert system can be built considering the following ideas:
- leaving always a state in which the nim sum is equal to zero during early/mid game
- exploit some final strategies depending on the state during end game

An explaination of the previously mentioned strategies can be found in this basic and short YouTube video: https://www.youtube.com/watch?v=SUh8C387BVU.

Basically there are two types of final situation also depending on which person starts the game:
- a state in which you have only a row -> in this case what you have to do is to take all the sticks except for the last one
- a state in which for example there are two rows with 1 and 2 sticks -> you mustn't leave nim sum equal to 0 removing 1 stick from the row with 2 sticks, but directly empty all that row

In [7]:
def my_expert_system(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    n_rows_with_sticks = len(state.rows) - state.rows.count(0)
    n_rows_with_1_stick = state.rows.count(1)

    # here I consider the case in which I have 1 row with 1 stick and one row with more than sticks
    if (n_rows_with_1_stick == 1 and n_rows_with_sticks == 2): 
        row, objects = [(row, objects) for row, objects in enumerate(state.rows) if objects > 1][0]
        if objects <= state.k:
            ply = Nimply(row, objects)
            return ply

    # here I consider the case in which I have 2 rows with 1 stick each
    if n_rows_with_1_stick == 2: 
        row, objects = [(row, objects) for row, objects in enumerate(state.rows) if objects >= 1][0]
        ply = Nimply(row, 1)
        return ply

    # here I consider the case in which I have only one more row with sticks
    if n_rows_with_sticks == 1:
        row, objects = [(row, objects) for row, objects in enumerate(state.rows) if objects >= 1][0]
        if objects <= (state.k + 1):
            ply = Nimply(row, objects - 1)
            return ply
    
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    logging.debug(pformat(f"{analysis['possible_moves']}"))
    ply = random.choice(spicy_moves)
    return ply

## Match between an expert system and a pure random strategy

In [8]:
logging.getLogger().setLevel(logging.INFO)

strategy = (pure_random, my_expert_system)

nim = Nim(4, 3)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")


INFO:root:init : <1 3 5 7>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 2 5 7>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 1 5 7>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 1 5 5>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 1 5 4>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 1 5 3>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <0 1 5 1>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 0 5 1>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 4 1>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 0 3 1>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 3 0>
INFO:root:ply: p

## (1+lambda) strategy

To implement this evolution strategy algorithm, I decided to define 3 basic moves and to create my player in such a way that he is able to understand which move he has to use in order to win the game. 
These three moves consist in:
- removing the maximum allowed number of sticks from a random row
- removing 1 stick from a random row
- leaving nim sum equal to 0

I added to these moves also the previously defined strategy used to create my_expert_system.
A point of strenght of this implementation consists in the division of the game in two phases. Indeed, as explained before, to win the nim game we have to behave in two completely different ways based on the fact that we are in early/mid game or in end game, so I thought that it would have been useful to have two different sets of weights.
I trained the player against an opponent that uses pure_random strategy and, once it finished, I tested it against another random player and against the optimal strategy. Note that as optimal strategy I used the one already defined and not my personal expert system.

In [9]:
#remove the maximum allowed number of sticks from a random row
def remove_max(state: Nim) -> Nimply:
    ply_list = []
    if state.k == None:
        for ply in (Nimply(r, c) for r, c in enumerate(state.rows) if c >= 1):
            ply_list.append(ply)
    else:
        for ply in (Nimply(r, min(c, state.k)) for r, c in enumerate(state.rows) if c >= 1):
            ply_list.append(ply)
    return random.choice(ply_list)

In [10]:
#remove 1 stick from a random row
def remove_1(state: Nim) -> Nimply:
    ply_list = []
    for ply in (Nimply(r, 1) for r, c in enumerate(state.rows) if c >= 1):
        ply_list.append(ply)
    return random.choice(ply_list)

In [11]:
#leave nim sum equal to 0
def leave_ns_0(state: Nim) -> Nimply:
    analysis = analize(state)
    ply_list = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not ply_list:
        ply_list = list(analysis["possible_moves"].keys())
    return random.choice(ply_list)

In [12]:
@dataclass
class my_player:

    def __init__(self, strategies, weights):

        self._strategies = [remove_1, remove_max, leave_ns_0, my_expert_system]
        if weights is None:
            self._weights = [] #list of list of weights (first list for the first phase, second list for the second phase)
            self._weights.append(np.random.random(len(self._strategies)))
            self._weights.append(np.random.random(len(self._strategies)))
        else:
            self._weights = weights

    @property
    def weights(self):
        return self._weights
    
    @property
    def strategies(self):
        return self._strategies

    def mutate(my_p):
        global LAMBDA
        global sigma

        weights = []
        weights.append(np.random.normal(loc=0, scale=sigma, size=(len(my_p._strategies)))+ my_p.weights[0])
        weights.append(np.random.normal(loc=0, scale=sigma, size=(len(my_p._strategies)))+ my_p.weights[1])
        
        for i in range(len(my_p._strategies)): #set to 0 the weights if negative
            if weights[0][i] < 0:
                weights[0][i] = 0.001
            if weights[1][i] < 0:
                weights[1][i] = 0.001

        return my_player(strategies=my_p.strategies, weights=weights)
        
    

In [13]:
LAMBDA = 20
sigma = 0.2 #mutation rate

In [14]:
def Endgame(state: Nim) -> int: #it returns the number of not empy rows
    n_rows_with_sticks = len(state.rows) - state.rows.count(0)
    return n_rows_with_sticks

def games(my_player, opponent_strategy, matches):
    wins = 0
    for _ in range(matches):
        nim = Nim(4, 3)
        player = 0
        endgame = 0
        while nim:            
            if player == 0:
                endgame = Endgame(nim)
                if endgame > 2:
                    strategy = random.choices(my_player.strategies, my_player.weights[0], k=1)[0]
                else:
                    strategy = random.choices(my_player.strategies, my_player.weights[1], k=1)[0]
                ply = strategy(nim)
            else:
                ply = opponent_strategy(nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 0:
            wins += 1
    fitness = wins / matches
    return fitness

In [18]:
matches = 500
opponent_strategy = pure_random
n_new_gen = 100

parent = my_player(None, None) #parent of the first generations
best_fitness = games(parent, opponent_strategy, matches) #ratio wins/matches used as fitness
print("Accuracies:")
print(f"  First accuracy: {best_fitness:.2%}")

for i in range(n_new_gen):
    offspring = []
    offspring_fitness = []
    for _ in range(LAMBDA): #generate lambda new individuals
        offspring.append(parent.mutate())
    for off in offspring: #evaluate the fitness of the new individuals
        offspring_fitness.append(games(off, opponent_strategy, matches))

    best_off = np.argmax(offspring_fitness) #selecting the best new individual
    if best_fitness < offspring_fitness[best_off]: 
        parent = offspring[best_off] #updating the parent for the next gen
        best_fitness = offspring_fitness[best_off] #updating the best fitness
        print(f"  New best accuracy achieved in the {i+1}° generation: {best_fitness:.2%}")
    if best_fitness >= 0.99:
        break

print('\nBest performances:')
print(f'  Accuracy of the trained player vs pure_random: {games(parent, pure_random, 10):.2%}')
print(f'  Accuracy of the trained player vs optimal (not my expert system): {games(parent, optimal, 10):.2%}')

Accuracies:
  First accuracy: 48.20%
  New best accuracy achieved in the 1° generation: 64.00%
  New best accuracy achieved in the 2° generation: 76.60%
  New best accuracy achieved in the 3° generation: 82.80%
  New best accuracy achieved in the 4° generation: 85.80%
  New best accuracy achieved in the 5° generation: 90.80%
  New best accuracy achieved in the 6° generation: 97.20%
  New best accuracy achieved in the 7° generation: 99.80%

Best performances:
  Accuracy of the trained player vs pure_random: 100.00%
  Accuracy of the trained player vs optimal (not my expert system): 100.00%
