## lab2: set-covering problem with GA

In [None]:
import random
import logging
import numpy as np
from matplotlib import pyplot as plt

logging.getLogger().setLevel(logging.INFO)

function to generate problem with different $N$:

In [None]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

constants:

In [None]:
POPULATION_SIZE = 20
OFFSPRING_SIZE = 60
MUTATION_THRESHOLD = 0.5    # param to tweak probability to perform mutation or not
INITIAL_LIST = 1            # param to select the number of selected list in initial solution
SEED = 42
EXEC_NUM = 5                # param to set the number of times a solution is given for N value

In [None]:
def score(genome, problem):
    rep, cov = 0, 0 # number of repetition in adding number to the set, total number covered
    set_ = set()
    for idx, bool in enumerate(genome):
        if bool:
            for val in problem[idx]:
                if val not in set_:
                    cov += 1
                    set_.add(val)
                else:
                    rep -= 1
    return (cov, rep)

def select_parent(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key= lambda i: -i[1][1])

def cross_over(p1, p2, genome_size):
    g1, g2 = p1[0], p2[0]
    return np.array([bool1 if r < 0.5 else bool2 for bool1, bool2, r in zip(g1, g2, np.random.random(genome_size))])

def mutation(g, genome_size):
    tmp = np.copy(g)
    # while np.random.random() < MUTATION_THRESHOLD: # possibility to mutate more than one gene
    #     idx = np.random.randint(0, GENOME_SIZE-1)
    #     tmp[idx] = not g[idx]
    if np.random.random() < MUTATION_THRESHOLD:
        idx = np.random.randint(0, genome_size-1)
        tmp[idx] = not g[idx]
    return tmp

def plot_hist(hist):
    history = np.array(hist)
    plt.figure(figsize=(14, 4))
    plt.plot(history, ".")

initialization of population:

In [None]:
def init_genome(genome_size):
    tmp = [False] * genome_size
    for _ in range(INITIAL_LIST):
        tmp[np.random.randint(0, genome_size-1)] = True
    return np.array(tmp)

def init_population(genome_size, allLists):
    population = list()
    # population is a true/false list of choice no choice of lists solution and a fitness like (#covered numbers, #of repetition)
    for genome in [init_genome(genome_size)  for _ in range(POPULATION_SIZE)]:
        population.append((genome, score(genome, allLists)))
    return population

GA algorithm:

In [None]:
def compute_solution(n, plot):
    allLists = problem(n, SEED)
    genome_size = len(allLists)
    population = init_population(genome_size, allLists)
    generations = 0
    fitness_calls = 0
    hist = list()
    while population[0][1][0] < n: # covered number < N
        generations += 1
        fitness_calls += OFFSPRING_SIZE
        offspring = list()
        for o in range(OFFSPRING_SIZE):
            if np.random.random() < .5:
                p = select_parent(population)
                m = mutation(p[0], genome_size)
                offspring.append((m, score(m, allLists)))
            else:
                p1, p2 = select_parent(population), select_parent(population)
                xo = cross_over(p1, p2, genome_size)
                offspring.append((xo, score(xo, allLists)))
        fitness = [_[1][0] for _ in offspring]
        hist += fitness
        population = sorted(offspring, key=lambda i: i[1], reverse=True)[:POPULATION_SIZE]
        logging.debug(f"s={population[0][1][0]}, select_lst={sum(population[0][0])}")

    w = population[0][1][0] + -population[0][1][1]
    print(f"{n}\t\t{generations}\t\t\t{fitness_calls}\t\t\t\t({population[0][1][0]},{-population[0][1][1]})\t\t\t\t{w}(bloat={(w-n)*100/n}%)")
    # if plot == EXEC_NUM: # uncomment to plot fitness over generations
        # plot_hist(hist) 

solutions for different values of $N$:

In [None]:
n_values = [5, 10, 20, 100, 500, 1000]
print(f"n\t\tgenerations\t\tfitness function calls\t\tsol. fitness=(cov,rep)\t\tweight(cov+rep)")
for n in n_values:
    for i in range(EXEC_NUM):
        compute_solution(n, i+1)