In [391]:
import logging
from pprint import pformat
from collections import namedtuple
import random
from copy import deepcopy
from dataclasses import dataclass
from random import random, choice, randint
import numpy as np

In [392]:

N = 5 #rows

NUM_GENERATION = 100
NUM_POPULATION = 200
MUTATION_RATE = 0.10
AGENT_MUTATIONS = 20
NUM_PARENTS = 2

Nimply = namedtuple("Nimply", "row, num_objects")
genoma = {"rows": 0, "elements": 0}
params = {"genoma": genoma, "sigma_r": 0.2, "sigma_t": 0.3}

In [393]:
@dataclass
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


class Agent:
    fitness: float
    genotype: list[int]
    mutation_rates: list[float]
    def __init__(self, genotype, mutation_rates, fitness):
        self.genotype = genotype
        self.mutation_rates = mutation_rates
        self.fitness = fitness


### Vs Strategies

In [394]:
def pure_random(state: Nim) -> Nimply:
    row = choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [395]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [396]:
def adaptive(state: Nim, agent: Agent) -> Nimply:
    possible_moves = [#rows and object iteration
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)
    ]
    (row, tessere) = Nimply(
        *max(
            possible_moves,
            key=lambda m: (
                (m[0] + agent.genotype[0]),
                (m[1] + agent.genotype[1]),
            ),
        )
    )
    if row > state.rows[0] and tessere < 0:
        return Nimply(
            *max(
                possible_moves,
                key=lambda m: ((m[0]), (m[1])),
            )
        )
    elif row > state.rows[0]:
        return Nimply(
            *max(
                possible_moves,
                key=lambda m: ((m[0]), (m[1] + agent.genotype[1])),
            )
        )
    elif tessere < 0:
        return Nimply(
            *max(
                possible_moves,
                key=lambda m: (
                    (m[0] + agent.genotype[0]),
                    (m[1] + agent.genotype[1]),
                ),
            )
        )
    return Nimply(
        *max(
            possible_moves,
            key=lambda m: (
                (m[0] + agent.genotype[0]),
                (m[1] + agent.genotype[1]),
            ),
        )
    )


In [397]:
## strategy in old simulation crashes, trying to fix this
def play(agent: Agent, opponent) -> float:
    win = 0
    for _ in range(0, 100):
        player = 0
        nim = Nim(N)
        while nim:
            if player == 0:
                ply = opponent(nim)
            else:
                ply = adaptive(nim, agent)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            win += 1
    return win / 100


## Genetic methods

In [398]:
def select_parent(pop):
    pool = [choice(pop) for _ in range(NUM_PARENTS)]
    champion = max(pool, key=lambda i: i.fitness)
    return champion


def mutate(agent: Agent, opponent) -> Agent:
    offspring = deepcopy(agent)
    for i in range(len(offspring.genotype)):
        if random() < offspring.mutation_rates[i]:
            offspring.genotype[i] += randint(-1, 1)
            offspring.mutation_rates[i] += random() * 0.1  # Small random change
            offspring.fitness = play(offspring, opponent)
    return offspring


def xover(p1: Agent, p2: Agent):
    gene_from_p1 = randint(0, len(p1.genotype) - 1)
    gene_from_p2 = 1 - gene_from_p1
    
    return Agent(
        fitness=0,
        genotype=[p1.genotype[gene_from_p1], p2.genotype[gene_from_p2]],
        mutation_rates=[random() for _ in range(2)],
    )


In [399]:
def find_best_agent(opponent):
    fitness_list = []
    population = [
        Agent(
            genotype=[-randint(0, N - 1), 0],
            mutation_rates=[random() for _ in range(2)],
            fitness=0,
        )
        for _ in range(NUM_POPULATION)
    ]
    for a in population:
        a.fitness = play(a, opponent)
    print(population)

    for generation in range(NUM_GENERATION):
        next_gen = list()
        for agent_to_mutate in range(AGENT_MUTATIONS):
            if random() < MUTATION_RATE:  # self-adapt mutation probability
                p = select_parent(population)
                o = mutate(p, opponent)
            else:
                p1 = select_parent(population)
                p2 = select_parent(population)
                o = xover(p1, p2)
            next_gen.append(o)

        for a in next_gen:
            a.fitness = play(a, opponent)
        population.extend(next_gen)
        population.sort(key=lambda i: i.fitness, reverse=True)
        population = population[:NUM_POPULATION]
        print(population[0].fitness)
        fitness_list.append(population[0].fitness)
    generations = list(range(1, len(fitness_list) + 1))

    return population[0]

In [400]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = choice(spicy_moves)
    return ply

In [401]:
def compute_avarage_win_rate(best_agent): ###benchmark the best agent found
    nim = Nim(N)
    opponent = pure_random
    player = 0
    win = 0
    win_rate = []
    for batch in range(10):
        win = 0
        for i in range(100):
            nim = Nim(N)
            player = 0
            while nim:
                if player == 0:
                    ply = opponent(nim)
                else:
                    ply = adaptive(nim, best_agent)
                logging.info(f"ply: player {player} plays {ply}")
                nim.nimming(ply)
                logging.info(f"status: {nim}")
                player = 1 - player
            if player == 1:
                win += 1
        win_rate.append(win)
    batches = list(range(1, len(win_rate) + 1))
    np_winrate = np.array(win_rate)
    avarage = np.mean(np_winrate)
    print("Avg winrate (1000 games): ", avarage)

In [402]:
### Plotting results

nim = Nim(N)
opponent = pure_random
best_agent = find_best_agent(opponent)
compute_avarage_win_rate(best_agent)

logging.getLogger().setLevel(logging.INFO)

logging.info(f"init : {nim}")
player = 0
win = 0
for i in range(100):
    nim = Nim(N)
    player = 0
    while nim:
        if player == 0:
            ply = opponent(nim)
        else:
            ply = adaptive(nim, best_agent)
        logging.info(f"ply: player {player} plays {ply}")
        nim.nimming(ply)
        logging.info(f"status: {nim}")
        player = 1 - player
    if player == 1:
        win += 1

logging.info(f"Best Agent won: {(win / 100) * 100}% of matches")
logging.info(f"Best Agent Genotype: {best_agent.genotype}")

[<__main__.Agent object at 0x000001502D9DDAC0>, <__main__.Agent object at 0x000001502DAD1070>, <__main__.Agent object at 0x000001502DBE1760>, <__main__.Agent object at 0x000001502DBE1A00>, <__main__.Agent object at 0x000001502DBE1070>, <__main__.Agent object at 0x000001502DBE1040>, <__main__.Agent object at 0x000001502DBE2840>, <__main__.Agent object at 0x000001502DBE1910>, <__main__.Agent object at 0x000001502DAC4380>, <__main__.Agent object at 0x000001502DAC72C0>, <__main__.Agent object at 0x000001502DAC6120>, <__main__.Agent object at 0x000001502DAC4890>, <__main__.Agent object at 0x000001502DAC6CF0>, <__main__.Agent object at 0x000001502DAC5610>, <__main__.Agent object at 0x000001502DAC7710>, <__main__.Agent object at 0x000001502DAC6090>, <__main__.Agent object at 0x000001502DAC7560>, <__main__.Agent object at 0x000001502DAC6E70>, <__main__.Agent object at 0x000001502DAC6480>, <__main__.Agent object at 0x000001502DAC5A00>, <__main__.Agent object at 0x000001502DAC68D0>, <__main__.Ag

INFO:root:ply: player 0 plays Nimply(row=3, num_objects=5)
INFO:root:status: <1 3 5 2 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=9)
INFO:root:status: <1 3 5 2 0>


0.6
0.6
0.6


INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 3 5 1 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <1 3 5 0 0>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 2 5 0 0>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=5)
INFO:root:status: <1 2 0 0 0>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 0 0 0 0>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 0 0 0>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=9)
INFO:root:status: <0 3 5 7 0>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=3)
INFO:root:status: <0 0 5 7 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=7)
INFO:root:status: <0 0 5 0 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=3)
INFO:root:status: <0 0 2 0 0>
INFO:root:ply: player

Avg winrate (1000 games):  39.5


INFO:root:status: <0 0 0 0 0>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=2)
INFO:root:status: <1 3 5 5 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=9)
INFO:root:status: <1 3 5 5 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=4)
INFO:root:status: <1 3 1 5 0>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=5)
INFO:root:status: <1 3 1 0 0>
INFO:root:ply: player 0 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 1 0 0>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=1)
INFO:root:status: <0 3 0 0 0>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 2 0 0 0>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 0 0 0 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=3)
INFO:root:status: <1 3 2 7 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=9)
INFO:root:status: <1 3 2 7 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=1)
INFO:root:status: <1 