In [2]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
from anytree import Node, RenderTree

random.seed(42)

# print(list(itertools.combinations([0, 1, 2, 3, 4], 4)))

In [3]:
Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def with_list(self, l: list) -> None:
        self._rows = [i for i in l]

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [4]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

def cook_status(state: Nim, func: Callable, c: list = None) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        if c is None:
            brute_force.append((m, func(tmp)))
        else:
            brute_force.append((m, func(tmp, c)))
    cooked["brute_force"] = brute_force

    return cooked

In [5]:
def optimal_startegy(state: Nim) -> Nimply:
    data = cook_status(state, nim_sum)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [7]:
# EVOLVING STUFF
CHROMOSOME_LENGTH = 15
POPULATION = 12
GENERATIONS = 100
# XOVER_P = 0.8
NIM_ROWS = 11

def build_chromosome(state: Nim) -> list:
    terminal_symbols = [rc for rc in range(len(state.rows))]
    function_symbols = ['+', '-', '*', '/', '%', '&', '|', '^']
    # function_symbols = ['+', '-', '*', '&', '|', '^']

    chromosome = [random.choice(terminal_symbols)]
    for _ in range(CHROMOSOME_LENGTH - 1):
        if random.randrange(2) == 0:
            # selected the terminal symbol list
            chromosome.append(random.choice(terminal_symbols))
        else:
            # selected the function symbol
            chromosome.append([
                random.choice(function_symbols),      # function to use
                random.randrange(len(chromosome)),    # first operand
                random.randrange(len(chromosome))     # second operand
            ])
    return chromosome

def eval_state(state: Nim, chromosome: list) -> int:
    c = deepcopy(chromosome)
    res = 0
    last_function_index = 0

    for i in range(len(c)):
        if isinstance(c[i], list):
            last_function_index = i
            # cicle the function to get the parameters, then save the result in place
            op1 = state.rows[c[c[i][1]]] if not isinstance(c[c[i][1]], list) else c[c[i][1]][3]
            op2 = state.rows[c[c[i][2]]] if not isinstance(c[c[i][2]], list) else c[c[i][2]][3]
            # print(f"op1: {op1} - op2: {op2}")
            # 3 things in here
            match c[i][0]:
                case '+': 
                    # print(f"{op1} + {op2}")
                    res = op1 + op2
                case '-': 
                    # print(f"{op1} - {op2}")
                    res = op1 - op2
                case '*': 
                    # print(f"{op1} * {op2}")
                    res = op1 * op2
                case '/': 
                    # print(f"{op1} / {op2}")
                    if op2 == 0: return 100     # if division by zero, discourage the option by over evaluating
                    res = op1 // op2
                case '%': 
                    # print(f"{op1} % {op2}")
                    if op2 == 0: return 100     # if division by zero, discourage the option by over evaluating
                    res = op1 % op2
                case '&': 
                    #print(f"{op1} & {op2}")
                    res = op1 & op2
                case '|': 
                    # print(f"{op1} | {op2}")
                    res = op1 | op2
                case '^': 
                    # print(f"{op1} ^ {op2}")
                    res = op1 ^ op2
            c[i].append(res)
    return c[last_function_index][3]
    
def build_tree(p: Node, c: list):
    l = p.list
    
    for i in range(len(l)):
        for d in range(l[i]):
            # build the list with the decreased value
            child_list = l[:i] + [l[i] - (d + 1)] + l[i+1:] if i != len(l) - 1 else l[:i] + [l[i] - (d + 1)]
            child = Node("node", parent=p)
            child.list = child_list
            tmp_nim = Nim(1)
            tmp_nim.with_list(l=child_list)
            child.type = eval_state(tmp_nim, c)

            # If parent is a P-position and the newborn is a P-position, violation of rule #1
            if p.type == 0 and child.type == 0: 
                p.fit = 1
                child.root.fit += 1

            # If parent is a N-position and none of the newborn is a P-position, violation of rule #2
            # TODO

            # TODO check for third rule

            # print(child.root)
            build_tree(child, c)

# fitness computation goes through the game tree checking for number of not respected rules 
# P POSITION = 0
# N POSITION != 0
# rules:
# 1) any move applied to a P-position turns the game into a N-position
# 2) there is at least one move that turns the game from a N-position into a P-position
# 3) the final position (when the game is over) is a P-position
# need to minimize this (low fitness is better)
def tree_fitness(state: Nim, c: list):
    configuration = [rc for rc in state.rows]
    root = Node("root", list = configuration, type = eval_state(state, c), fit = 0)
    build_tree(root, c)
    return root.fit

def local_startegy(local_state: Nim, c: list) -> Nimply:
    data = cook_status(local_state, eval_state, c)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

def dumb_fitness(state: Nim, c: list):
    strategy = (pure_random, local_startegy)
    player = random.randrange(2)
    wincount = 0
    rounds = 50

    for g in range(rounds):
        nim = deepcopy(state)
        while nim:
            ply = strategy[player](nim) if player == 0 else strategy[player](nim, c)
            nim.nimming(ply)
            player = 1 - player
        winner = 1 - player
        # logging.debug(f"fitness: game {g} won by {winner} started as -> {state}")
        if winner == 1: wincount += 1
    
    return 1 - wincount/rounds
            
def binary_tournament(fitval: list, n: int = 4):
    choosen = random.choices(fitval, k=n)
    choosen.sort(key=lambda x: x[0])
    return choosen[0], choosen[1]

def single_crossover(c1: list, c2: list):
    point = random.randrange(len(c1))
    return c1[:point] + c2[point:], c2[:point] + c1[point:]

def double_crossover(c1: list, c2: list):
    point1 = random.randint(0, len(c1) // 2)
    point2 = random.randint(point1, len(c1))
    return c1[:point1] + c2[point1:point2] + c1[point2:], c2[:point1] + c1[point1:point2] + c2[point2:]

def mutation(state: Nim, chromosome: list):
    c = deepcopy(chromosome)
    # print(f"\nbefore: {c}")
    terminal_symbols = [rc for rc in range(len(state.rows))]
    function_symbols = ['+', '-', '*', '/', '%', '&', '|', '^']
    point = random.randrange(len(c))
    if point == 0:
        c[0] = random.choice(terminal_symbols)
        # print(f"after: {c}")
        return c

    new_thing = None
    if random.random() < 0.5:
        new_thing = random.choice(terminal_symbols)
    else:
        new_thing = [random.choice(function_symbols), random.randrange(point), random.randrange(point)]
    c[point] = new_thing
    # print(f"after: {c}")
    return c

# fitness = tree_fitness
fitness = dumb_fitness
# crossover = double_crossover
crossover = single_crossover

nim = Nim(NIM_ROWS)
# print(f"specific in rows: {nim.rows[2]}")
population = [build_chromosome(nim) for _ in range(POPULATION)]
fitval = [(fitness(nim, c), c) for c in population]
fitval.sort(key=lambda x: x[0])
print("Finished population evaluation")

xover_p = 0.8
USELESS_GEN_THRESHOLD = GENERATIONS / 10

oldness = [fitval[0][0], 0]
for g in range(GENERATIONS):
    one, two = binary_tournament(fitval)
    action = 0

    if random.random() < xover_p:
        off1, off2 = crossover(one[1], two[1])

        # consider mutation on offspring
        mut1, mut2 = 0, 0
        if random.random() < 0.2:
            off1 = mutation(nim, off1)
            off1 = mutation(nim, off1)
            mut1 = 1
        if random.random() < 0.2:
            off2 = mutation(nim, off2)
            off2 = mutation(nim, off2)
            mut2 = 0

        off1 = (fitness(nim, off1), off1)
        off2 = (fitness(nim, off2), off2)
        #print(f"fitness: {off1[0]} and {off2[0]}")
        toadd = (off1, mut1) if off1[0] < off2[0] else (off2, mut2)
        fitval.append(toadd[0])
    else:
        action = 1
        choosen = one[1] if random.random() < 0.5 else two[1]
        choosen = mutation(nim, choosen)
        choosen = mutation(nim, choosen)
        fitval.append((fitness(nim, choosen), choosen))

    #print(f"Newly created:\n{off1}\nand\n{off2}")
    fitval.sort(key=lambda x: x[0])
    fitval = fitval[:POPULATION]
    if fitval[0][0] != oldness[0]:
        oldness[0] = fitval[0][0]
        oldness[1] = 0
        if xover_p <= 0.7: xover_p += 0.1
    else:
        oldness[1] += 1
        if oldness[1] % USELESS_GEN_THRESHOLD == 0:
            xover_p -= 0.1
            if xover_p < 0.4: xover_p = 0.4
    print(f"Generation {g} ended with best {fitval[0][0]:.2f} - Offspring added is {toadd[0][0]:.2f} and it was {'XOVER' if not action else 'MUTATED'} - xover_p = {xover_p:.2f} - population fintess list: {[f[0] for f in fitval]}")
print(f"Winrate is {((1 - fitval[0][0]) * 100):.2f}% against random_strategy")

print(fitval)

Finished population evaluation
Generation 0 ended with best 0.38 - Offspring added is 0.44 and it was XOVER - xover_p = 0.80 - population fintess list: [0.38, 0.43999999999999995, 0.45999999999999996, 0.45999999999999996, 0.48, 0.52, 0.52, 0.56, 0.56, 0.6, 0.6, 0.62]
Generation 1 ended with best 0.38 - Offspring added is 0.46 and it was XOVER - xover_p = 0.80 - population fintess list: [0.38, 0.43999999999999995, 0.45999999999999996, 0.45999999999999996, 0.45999999999999996, 0.48, 0.52, 0.52, 0.56, 0.56, 0.6, 0.6]
Generation 2 ended with best 0.38 - Offspring added is 0.46 and it was MUTATED - xover_p = 0.80 - population fintess list: [0.38, 0.43999999999999995, 0.45999999999999996, 0.45999999999999996, 0.45999999999999996, 0.48, 0.5, 0.52, 0.52, 0.56, 0.56, 0.6]
Generation 3 ended with best 0.38 - Offspring added is 0.58 and it was XOVER - xover_p = 0.80 - population fintess list: [0.38, 0.43999999999999995, 0.45999999999999996, 0.45999999999999996, 0.45999999999999996, 0.48, 0.5, 0.5