In [153]:
import random
import numpy as np
from numpy.typing import NDArray
import logging
from dataclasses import dataclass
logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s")

In [154]:
DEBUG = False
logging.getLogger().setLevel(logging.DEBUG if DEBUG else logging.INFO)

In [155]:
@dataclass
class RouletteWheel:
    population: NDArray

    def spin(self, k=2):
        # softmax
        p = np.exp(np.fromiter(map(lambda x: x.fitness, self.population), dtype=float))
        p = p / p.sum()
        return np.random.choice(self.population, k, p = p)

In [156]:
class Individual:

    def __init__(self, *, included_lists: NDArray, P: NDArray, N: int):
        self.P = P
        self.N = N
        self.included_lists = included_lists

    def __repr__(self):
        return str(self.included_lists)

    def __len__(self):
        return sum(map(lambda x: len(x), self.P[np.where(self.included_lists == 1)[0]]))
        
    @property    
    def fitness(self):
        max_bloat = sum(map(lambda x: len(x), self.P))
        return sum(self.coveredNumbers) + (max_bloat - len(self))

    @property
    def isInvalid(self):
        return not self.coveredNumbers.any()

    @property
    def isGoal(self):
        return (self.coveredNumbers == 1).all()

    def __matmul__(self, other):
        '''Performs crossover between self and other'''
        assert (self.P == other.P).all(), "Two invididuals must belong to the same problem!"
        assert self.N == other.N, "Two invididuals must belong to the same problem!"
        index = np.random.randint(len(self.P))
        new_included_lists = np.hstack([self.included_lists[:index], other.included_lists[index:]])
        return Individual(included_lists=new_included_lists, P=self.P, N=self.N)

    def __invert__(self):
        '''Mutates a random number of self genes'''
        new_included_lists = self.included_lists[:]
        index = np.random.randint(len(new_included_lists))
        new_included_lists[index] = 1 - new_included_lists[index]
        return Individual(included_lists=new_included_lists, P=self.P, N=self.N)

    def _list_to_binary(self, list: NDArray):
        binary_mapping = np.zeros(self.N)
        binary_mapping[list] = 1
        return binary_mapping

    @property
    def coveredNumbers(self):
        covered_numbers = np.zeros(self.N)
        for i in np.where(self.included_lists == 1)[0]:
            binary_mapping = self._list_to_binary(self.P[i])
            covered_numbers = np.logical_or(covered_numbers, binary_mapping).astype(np.int32)
        return covered_numbers

In [157]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [158]:
SEED = 42
POPULATION_SIZE = 100
OFFSPRING_SIZE = 50
MAX_COUNT_GENERATION = 100

In [159]:
for N in [5, 10, 20]:
    P = np.array(problem(N), dtype=object)

    population = list()

    for genome in [np.fromiter((random.choice([1, 0]) for _ in range(len(P))), dtype=np.int8) for _ in range(POPULATION_SIZE)]:
        individual = Individual(included_lists=genome, P=P, N=N)
        if not individual.isInvalid:
            population.append(individual)

    count_generation = 0
    while count_generation < MAX_COUNT_GENERATION and not population[0].isGoal:
        count_generation += 1
        offspring = list()
        roulette = RouletteWheel(population)
        while len(offspring) < OFFSPRING_SIZE:
            p1, p2 = roulette.spin(k=2)
            o = p1 @ p2 # crossover
            if np.random.rand() > .2:
                o = ~o # mutation
            if not individual.isInvalid:
                offspring.append(o)
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
    
    logging.info(f"{len(population[0])}, {population[0].isGoal}")

2022-10-31 16:33:30,748 INFO: 3, False
2022-10-31 16:33:42,723 INFO: 4, False
2022-10-31 16:33:42,744 INFO: 224, True
