Copyright **`(c)`** 2023 Stiven Hidri `<hidristiven@gmail.com>`  
[`https://github.com/stiven-hidri/CI2324`](https://github.com/stiven-hidri/CI2324)


# Lab 3: ES

## Task

Write agents able to play [*Game*](https://en.wikipedia.org/wiki/Game), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be red in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.
## !! From the formula showed in the slide it seemed that the one taking the last item wins. This rule is chosen instead of the one stated above !!

* Task2.1: An agent using fixed rules based on *game-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside the course repo
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



## Imports

In [21]:
from pprint         import pprint, pformat
from collections    import namedtuple
from copy           import deepcopy
from dataclasses    import dataclass
from functools      import reduce
from itertools      import product
import numpy        as np
import random

## The *Game* and *Move* classes

In [22]:
Move = namedtuple("Move", "row, taken") #Move: {row:value, taken:value}

class Field:
    def __init__(self, in_rows, k) -> None:
        self._k=k
        if isinstance(in_rows, int):
            self._rows = [i * 2 + 1 for i in range(in_rows)] #pyramid: 1, 3, 5, 7 ... Each element says how many firematches per row    
            self._n=in_rows
        else:
            self._rows = list(in_rows).copy()
            self._n=len(in_rows)
            

    def again(self):
        self._rows = [i * 2 + 1 for i in range(self._n)]

    def nimming(self, move: Move) -> None:
        assert self._rows[move.row] >= move.taken
        assert self._k is None or move.taken <= self._k
        self._rows[move.row] -= move.taken

    def __eq__(self, other) -> bool:
        return self._rows == other._rows
    
    def __key(self):
        return tuple(self._rows)

    def __hash__(self):
        return hash(self.__key())
    
    def __str__(self) -> str:
        return str(self.Rows)
    @property
    def Rows(self) -> tuple:
        return tuple(self._rows)
    
    @property
    def K(self) -> int:
        return self._k


class Game:
    def __init__(self, in_rows:int=4, k = 3) -> None:
        self.field = Field(in_rows, k)
        print(f"Rows:{len(self.Rows)}   K:{self.K}   INIT: {str(self)}\n")

    def __bool__(self):
        return sum(self.Rows) > 0

    def __str__(self):
        return f"< {' '.join([str(n) for n in self.Rows])} >"

    def action(self, move: Move) -> None:
        self.field.nimming(move)

    def play(self, agents:tuple) -> None:
        turn = random.choice(range(0,2))
        while self: #while we have fire matches
            move = agents[turn].strategy(self)
            print(f"{str(agents[turn])}:\t{str(self)}", end="  ->  ")
            self.action(move)
            print(f"{str(self)}")
            turn = 1 - turn

        print(f"\n{str(agents[1-turn])} wins")

    def playManyTimes(self, agents:tuple, times=100) -> None:
        scores = [0,0]
        print("Playing...")
        for i in range(times):
            turn = random.choice(range(0,2))
            self.field.again()
            while self: #while we have fire matches
                move = agents[turn].strategy(self)
                self.action(move)
                turn = 1 - turn
            
            scores[1-turn]+=1

        print(f"{agents[0]._name} {scores[0]} VS {agents[1]._name} {scores[1]}")

    @property
    def Rows(self) -> tuple:
        return tuple(self.field._rows)
    
    @property
    def K(self) -> int:
        return self.field._k

## Agents

In [23]:
#Agents
class Agent:
    _name = "AGENT"

    def __str__(self):
        return f"{self._name}"

    def nim_sum(self, game: Game) -> int: #nim sum
        if game.K is None:
            source = game.Rows #game rows
        else:
            source = [x % (game.K+1) for x in game.Rows] #mex
        
        tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in source])
        xor = tmp.sum(axis=0) % 2
        return int("".join(str(_) for _ in xor), base=2)

    def get_possible_moves(self, game: Game) -> dict:
        possible_moves = dict()
        for move in (Move(r, o) for r, c in enumerate(game.Rows) for o in range(1, c + 1)):
            if game.K is not None and move.taken > game.K:
                continue    
            
            game_aftermove = deepcopy(game)
            game_aftermove.action(move)
            possible_moves[move] = self.nim_sum(game_aftermove) #for each move (row, taken) i save the resulting nim sum

        return possible_moves

class randomAgent(Agent):
    _name = "RND"
    
    def strategy(self, game: Game) -> Move:
        possible_moves = super().get_possible_moves(game)
        return random.choice(list(possible_moves.keys()))
    
class optimalAgent(Agent):
    _name = "OPT"

    def strategy(self, game: Game) -> Move:
        move = None

        possible_moves = Agent.get_possible_moves(self, game)
        optimal_moves = [move for move, nim_sum in possible_moves.items() if nim_sum == 0]
        if len(optimal_moves)>0:
            move = random.choice(optimal_moves)
        else:
            move = random.choice(list(possible_moves.keys()))

        return move






    

## Evolutionary Algorithm Agent

In [24]:
class eaAgent(Agent):
    _POPULATION_SIZE = 1000
    _OFFSPRING_SIZE = 100
    _MUTATION_PROBABILITY = .3
    _TOURNAMENT_SIZE = 10
    _GENERATIONS = 200

    def __init__(self, N, k) -> None:
        self._N = N
        self._k = k
        self._name = "EA"
        self._std_genotype = self.generate_genotype(N)
        self.strongest = self.train()
        
    def generate_genotype(self, N):
        sets = []
        for i in range(N):
            # Each set has numbers from 0 to i*2+1
            sets.append(list(range(i * 2 + 2)))

        # Use itertools.product to generate all combinations
        all_combinations = list(product(*sets))
        all_states = [Field(c, self._k) for c in all_combinations if sum(c)>0]
        genes = dict()

        for s in all_states:
            genes[s]=None

        return genes

    def nim_sum(self, gene:tuple[Field, Move]) -> int: #nim sum
        field = deepcopy(gene[0])
        field.nimming(gene[1])

        if field.K is None:
            source = field.Rows #game rows
        else:
            source = [x % (field.K+1) for x in field.Rows] #mex
        
        tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in source])
        xor = tmp.sum(axis=0) % 2
        return int("".join(str(_) for _ in xor), base=2)

    def initialize_genes(self): 
        geno = deepcopy(self._std_genotype)
        for g in geno:
            target = random.choice([(i,r) for i, r in enumerate(g._rows) if r>0])
            items_to_take = random.randint(1, min([target[1], self._k]))
            assert(g._rows[target[0]]>=items_to_take)
            geno[g]=Move(target[0], items_to_take)

        return geno

    @dataclass
    class Individual:
        fitness: int
        genotype: dict


    def fitness(self, genotype:dict[Field, Move]):
        score = 0
        for k,v in genotype.items():
            score += int(self.nim_sum((k,v))==0)*10 + v.taken

        return score

    def select_parent(self, pop):
        pool = random.sample(pop, self._TOURNAMENT_SIZE)
        champion = max(pool, key=lambda i: i.fitness)
        return champion

    def mutate(self, ind: Individual) -> Individual:
        offspring = deepcopy(ind)
        key = random.choice([key for key in ind.genotype.keys()])
        
        target = random.choice([(i,r) for i, r in enumerate(key._rows) if r>0])
        items_to_take = random.randint(1, min([target[1], self._k]))

        assert(key._rows[target[0]]>=items_to_take)
        offspring.genotype[key]=Move(target[0], items_to_take)

        offspring.fitness = self.fitness(offspring.genotype)
        return offspring

    def one_cut_xover(self, ind1: Individual, ind2: Individual) -> Individual:
        incoming_genes = random.randint(1, len(ind1.genotype.keys()))
        crossing_genes = random.sample([key for key in ind1.genotype.keys()], k=incoming_genes)
        offspring = self.Individual(fitness=0, genotype=deepcopy(ind1.genotype))
        for k in crossing_genes:
            offspring.genotype[k]=deepcopy(ind2.genotype[k])
            
        offspring.fitness=self.fitness(offspring.genotype)

        return offspring

    def train(self) -> Individual:
        population = [ self.Individual(genotype=self.initialize_genes(), fitness=0) for _ in range(self._POPULATION_SIZE) ]

        for p in population:
            p.fitness = self.fitness(p.genotype)

        print(f"Generations: {self._GENERATIONS}")
        print("Training EA...")
        for gen in range(self._GENERATIONS):
            print(f"\rG{gen+1}", end='', flush=True)
            offspring = []
            for counter in range(self._OFFSPRING_SIZE):
                if random.random() < self._MUTATION_PROBABILITY:
                    # mutation
                    p = self.select_parent(population)
                    o = self.mutate(p)
                else:
                    # xover
                    p1 = self.select_parent(population)
                    p2 = self.select_parent(population)
                    o = self.one_cut_xover(p1, p2)
                offspring.append(o)

            population.extend(offspring)
            population.sort(key=lambda i: i.fitness, reverse=True)
            population = population[:self._POPULATION_SIZE]

        print("\nDone...")

        return max(population, key=lambda i: i.fitness)
    
    def strategy(self, game:Game) -> Move:
        return self.strongest.genotype[game.field]
    

ea = eaAgent(2, 3)

Generations: 200
Training EA...
G200
Done...


In [28]:
game = Game(2, 3)
game.playManyTimes((optimalAgent(), randomAgent()))

Rows:2   K:3   INIT: < 1 3 >

Playing...
OPT 90 VS RND 10
