# Lab3
### Task
Write agents able to play Nim, with an arbitrary number of rows and an upper bound  on the number of objects that can be removed in a turn (a.k.a., subtraction game).  
The player taking the last object wins.  

- Task3.1: An agent using fixed rules based on nim-sum (i.e., an expert system)
- Task3.2: An agent using evolved rules
- Task3.3: An agent using minmax
- Task3.4: An agent using reinforcement learning

# Lab3
### Task
Write agents able to play Nim, with an arbitrary number of rows and an upper bound  on the number of objects that can be removed in a turn (a.k.a., subtraction game).  
The player taking the last object wins.  

- Task3.1: An agent using fixed rules based on nim-sum (i.e., an expert system)
- Task3.2: An agent using evolved rules
- Task3.3: An agent using minmax
- Task3.4: An agent using reinforcement learning

## Classes and functions

In [2]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor, or_, and_, not_
import math

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [4]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [5]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k]
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)


    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

In [6]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [7]:
def evaluate(strategy1: Callable, strategy2: Callable, num_matches=100, nim_size=6) -> float:
    opponent = (strategy1, strategy2)
    won = 0

    for m in range(num_matches):
        m+=1
        nim = Nim(nim_size)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / num_matches

## 3.0: Let's try some basic hard-coded rules first

In [40]:
def make_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)
        if len([el for el in state.rows if el !=0]) == 2: #if i only have two lines left with objects
            ply = Nimply(data["longest_row"], state.rows[data["longest_row"]]-state.rows[data["shortest_row"]]) #always make lines even
        elif sum([el for el in state.rows])%2 == 1: #if the sum of the elements is odd
            for m in data["possible_moves"]:    #make a move that will make it even
                tmp=deepcopy(state)
                tmp.nimming(m)
                if sum([el for el in tmp.rows])%2  == 0:
                    ply = Nimply(m[0],m[1])
        elif random.random() < genome["p"]:
            ply = Nimply(data["shortest_row"], random.randint(1, state.rows[data["shortest_row"]]))
        else:
            ply = Nimply(data["longest_row"], random.randint(1, state.rows[data["longest_row"]]))

        return ply

    return evolvable

The two simple rules to make lines sum to an even number and to always make the last two lines contain the same number of objects allows us to win more than 90% of the matches against a random strategy

In [61]:
print(evaluate(make_strategy({"p":0.3}),pure_random))

0.95


In [28]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (make_strategy({"p":0.3}), pure_random)

nim = Nim(11)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:status: After player 0 -> <1 3 5 7 9 11 13 15 17 19 0>
DEBUG:root:status: After player 1 -> <1 0 5 7 9 11 13 15 17 19 0>
DEBUG:root:status: After player 0 -> <1 0 5 7 9 11 13 15 17 0 0>
DEBUG:root:status: After player 1 -> <1 0 5 7 9 11 6 15 17 0 0>
DEBUG:root:status: After player 0 -> <1 0 5 7 9 11 6 15 0 0 0>
DEBUG:root:status: After player 1 -> <1 0 5 6 9 11 6 15 0 0 0>
DEBUG:root:status: After player 0 -> <1 0 5 6 9 11 6 0 0 0 0>
DEBUG:root:status: After player 1 -> <1 0 5 6 9 11 5 0 0 0 0>
DEBUG:root:status: After player 0 -> <1 0 5 6 9 11 0 0 0 0 0>
DEBUG:root:status: After player 1 -> <0 0 5 6 9 11 0 0 0 0 0>
DEBUG:root:status: After player 0 -> <0 0 5 6 9 0 0 0 0 0 0>
DEBUG:root:status: After player 1 -> <0 0 5 3 9 0 0 0 0 0 0>
DEBUG:root:status: After player 0 -> <0 0 5 3 0 0 0 0 0 0 0>
DEBUG:root:status: After player 1 -> <0 0 0 3 0 0 0 0 0 0 0>
DEBUG:root:status: After player 0 -> <0 0 0 0 0 0 0 0 

## 3.1: An agent using fixed rules based on nim-sum (i.e., an expert system)

In [43]:
def optimal_strategy(state: Nim) -> Nimply:
    'optimal strategy with nim sum'
    data = cook_status(state)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

In [49]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (pure_random, optimal_strategy)

nim = Nim(11)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:status: After player 0 -> <1 3 5 7 9 11 9 15 17 19 21>
DEBUG:root:status: After player 1 -> <1 3 5 7 9 11 9 15 2 19 21>
DEBUG:root:status: After player 0 -> <1 0 5 7 9 11 9 15 2 19 21>
DEBUG:root:status: After player 1 -> <1 0 5 4 9 11 9 15 2 19 21>
DEBUG:root:status: After player 0 -> <1 0 5 4 9 11 9 15 2 1 21>
DEBUG:root:status: After player 1 -> <1 0 5 4 9 11 9 15 2 1 7>
DEBUG:root:status: After player 0 -> <1 0 5 3 9 11 9 15 2 1 7>
DEBUG:root:status: After player 1 -> <1 0 2 3 9 11 9 15 2 1 7>
DEBUG:root:status: After player 0 -> <1 0 2 3 9 11 9 15 2 0 7>
DEBUG:root:status: After player 1 -> <0 0 2 3 9 11 9 15 2 0 7>
DEBUG:root:status: After player 0 -> <0 0 2 3 8 11 9 15 2 0 7>
DEBUG:root:status: After player 1 -> <0 0 2 2 8 11 9 15 2 0 7>
DEBUG:root:status: After player 0 -> <0 0 2 2 8 11 0 15 2 0 7>
DEBUG:root:status: After player 1 -> <0 0 2 2 1 11 0 15 2 0 7>
DEBUG:root:status: After player 0 -> <0 0

### Evaluation optimal strategy
We can evaluate the optimal strategy to confirm that it wins all the matches against a random strategy.  
In the future it will be unnecessary to compare the optimal strategy with any other kind of strategy becase it will always win.

In [9]:
evaluate(optimal_strategy, pure_random)

1.0

## 3.2: An agent using evolved rules

Define a semi-optimal strategy so that I can compare the result of the ga with an algorithm that is not optimal but still very close to it.

In [10]:
def semi_optimal_strategy(state: Nim) -> Nimply:
    data = cook_status(state)
    if random.random() < 0.75:
        return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]
    else:
        row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        num_objects = random.randint(1, state.rows[row])
        return Nimply(row, num_objects)

In [85]:
from statistics import *

def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)

def crossover(g1,g2,problem_size):
    cut = random.randint(0, problem_size) #i have 5 rules
    return g1.genome[:cut] + g2.genome[cut:]

def mutation(g, problem_size):
    point = random.randint(0, problem_size-1)
    return g[:point] + [1 - g[point]] + g[point + 1 :]

def initialize_population():
    return 

def my_and(rows: tuple) -> int:
    *_, result = accumulate(rows, and_)
    return result

def my_or(rows: tuple) -> int:
    *_, result = accumulate(rows, or_)
    return result

RULES = [sum, min, stdev, my_and, my_or]

def evolvable_strategy(genome):
    def strategy(state: Nim):
        candidate_plays = list()
        data=cook_status(state)
        possible_moves = data["possible_moves"]
        for m in possible_moves:
            tmp = deepcopy(state)
            tmp.nimming(m)
            play = (m, sum(w * op(tmp.rows) for w, op in zip(genome, RULES)))
            candidate_plays.append(play)
            if play[1] == 0:
                return play[0]                                
        return min(candidate_plays, key=lambda x: x[1])[0]
    return strategy

def fitness(genome):
    'fitness with semi-optimal strategy'
    return (evaluate(evolvable_strategy(genome),semi_optimal_strategy,num_matches=10) , evaluate(evolvable_strategy(genome),pure_random,num_matches=10))

In [89]:
def genetic_algorithm(rules: list, fixed=True):
    Individual = namedtuple('Individual',('genome','fitness'))
    NUM_GENERATIONS = 100
    POPULATION_SIZE = 10
    OFFSPRING_SIZE = 5
    problem_size=len(rules)

    #Variables initialization
    plateau_count = 0 #stores the number of consecutive generations in which the algorithm does not improve
    #Initialize population
    population = [Individual(i, fitness(i)) for i in ([random.choice([0,1]) for _ in range(len(RULES))] for _ in range(POPULATION_SIZE))]

    #Create fitness log
    fitness_log = [(0, i.fitness) for i in population]

    prob_vett = [0.3,0.5,0.7]
    
    #Evolution algorithm
    for g in range(NUM_GENERATIONS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            #decide the probability of crossover/mutation
            if fixed: prob=0.3
            elif g//10 > 2: #if fixed=False increase the probabiliy of mutation every 10 generations
                prob=0.7
            else: prob=prob_vett[g//10]
            if random.random() < prob:
                p = tournament(population)
                o = mutation(p.genome,problem_size)
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = crossover(p1, p2, problem_size)
            f = fitness(o)
            fitness_log.append((g + 1, f))
            offspring.append(Individual(o, f))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
        #check if we have reached a plateau, if we don't improve for 20 generations break
        if max(f[1] for f in fitness_log if f[0] == g + 1) <= max(f[1] for f in fitness_log if f[0] == g): plateau_count += 1
        else: plateau_count = 0
        if plateau_count == 20: break 
    logging.info(f"The best individual is: {population[0]}")
    return evolvable_strategy(population[0].genome)


In [87]:
logging.getLogger().setLevel(logging.INFO)
final_strategy = genetic_algorithm(RULES)
print(final_strategy)

INFO:root:The best individual is: Individual(genome=[0, 1, 0, 1, 1], fitness=(0.9, 0.7))


<function evolvable_strategy.<locals>.strategy at 0x0000021C2B042CA0>


In [88]:
print(evaluate(final_strategy,pure_random))
print(evaluate(final_strategy,semi_optimal_strategy))
print(evaluate(final_strategy,optimal_strategy))

0.84
0.47
0.0
