#### **Francesco Fiorella**'s GA Solution for Set Covering

In [1]:
import random
import logging
from collections import namedtuple
from functools import reduce
from matplotlib import pyplot as plt

In [2]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [3]:
logging.basicConfig(format="%(message)s", level=logging.INFO)

In [122]:
PROBLEM_SIZE = 1000 # N
POPULATION_SIZE = 50
OFFSPRING_SIZE = 10

NUM_GENERATIONS = 5000

In [123]:
# count the number of 1's in a number
def bitcount(n):
    count = 0
    while n > 0:
        count = count + 1
        n = n & (n-1)
    return count

# convert a list to an int. Ex. [0, 2, 3] becomes 01101 = 13
def list_to_int(list_):
    list_[0] = 2**list_[0]
    return reduce(lambda x, y: x | 2**y, list_)

# creates the lists, converts them to integers, and returns a set
def get_all_lists(N):
    return list(set(map(lambda l: list_to_int(l), problem(N))))

In [124]:
import numpy as np

Individual = namedtuple("Individual", ["genome", "fitness"])


# fitness function
def get_fitness(genome):
    max = 2**PROBLEM_SIZE - 1
    current = reduce(lambda x, y: x | y, genome)
    return bitcount(max - current)


# returns individuals with min fitness (the fittest) but still random
def tournament(population, tournament_size=2):
    return min(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


# merge or cross two individuals
def cross_over(genome1, genome2):
    cut = random.randint(0, PROBLEM_SIZE)
    if random.randint(0, PROBLEM_SIZE) % 3 == 0:
        return genome1[:cut] + genome2[cut:]
    elif random.randint(0, PROBLEM_SIZE) % 3 == 2:
        return genome2[:cut] + genome1[cut:]
    else:
        return [g1 if rand < 0.5 else g2 for g1, g2, rand in zip(genome1, genome2, np.random.random(PROBLEM_SIZE))]


# replace a list in the individual with one "original" list
def mutation(g):
    point = random.randint(0, len(g))
    random_ = random.choice(all_lists)
    return list(set(g[:point] + ([random_]) + g[point + 1 :]))

In [125]:
all_lists = get_all_lists(PROBLEM_SIZE)
population = list(map(lambda l: Individual([l], get_fitness([l])), all_lists))[:POPULATION_SIZE]
logging.info(f"init: pop_size={len(population)}; min={min(population, key=lambda i: i.fitness)[1]}")

init: pop_size=50; min=606


In [126]:
fitness_log = [(0, i.fitness) for i in population]

solution = 0
sol_weight = 0
found = False

for g in range(NUM_GENERATIONS):
    if found:
        # save the number of generations (for the graphic)
        found = g
        break
    offspring = list()
    for i in range(OFFSPRING_SIZE):
        

        # Explore
        if g/NUM_GENERATIONS < 0.8:
            # ...by mutation
            if random.random() < 0.3:
                p = tournament(population)
                o = mutation(p.genome)
                
            # ...by cross-over
            else:
                p1 = tournament(population)
                p2 = tournament(population)
                o = cross_over(p1.genome, p2.genome)
        # Exploit
        else:
            # ...by mutation
            if random.random() < 0.8:
                p = tournament(population, PROBLEM_SIZE//2)
                o = mutation(p.genome)
                
            # ...by cross-over
            else:
                p1 = tournament(population, PROBLEM_SIZE//2)
                p2 = tournament(population, PROBLEM_SIZE//2)
                o = cross_over(p1.genome, p2.genome)



        f = get_fitness(o)
        fitness_log.append((g + 1, f))
        # if a solution was found, check if it is the best and save it
        # add to the offspring list only if it is not a solution
        if f == 0:
            weight = 0
            for item in o:
                weight += bitcount(item)
            if sol_weight == 0:
                sol_weight = weight
                solution = o
            else:
                if weight < sol_weight:
                    sol_weight = weight
                    solution = o
            if weight == PROBLEM_SIZE:
                found = True
                break
        else:
            offspring.append(Individual(o, f))
    for goff in offspring:
        if goff not in population:
            population.append(goff)
    population = sorted(population, key=lambda i: i.fitness)[:POPULATION_SIZE]

if solution != 0:
    logging.info(f"Found a solution for N={PROBLEM_SIZE}")
    #logging.info(f"Solution: {solution}")
    logging.info(f"Weight: {sol_weight}")
    logging.info(f"Number of generations: {found if found else NUM_GENERATIONS}")
    logging.info(f"Population size: {POPULATION_SIZE}")
    logging.info(f"Offspring size: {OFFSPRING_SIZE}")
else:
    logging.info(f"Solution for N={PROBLEM_SIZE} not found!")

Found a solution for N=1000
Weight: 3645
Number of generations: 5000
Population size: 50
Offspring size: 10
