# Lab 2: Set-Covering Problem using Genetic Algorithms

In [15]:
from operator import index
import random
import sys
from time import time
import numpy as np
from collections import namedtuple
import logging
from collections import Counter


In [16]:
POPULATION_SIZE = 100000 
OFFSPRING_SIZE = 1000

NUM_GENERATIONS = 10

N = 50

random.seed(42)

GOAL = set(range(N))

In [17]:
import itertools
from typing import Callable

def remove_duplicates(list_):
  list_ = list(k for k,_ in itertools.groupby(list_))

  return list_

def sort_by(list_: list, key: Callable = None):
  list_.sort(key=key)
  return list_
  
def preproc(problem, rem_dup=False, sort=False, f:Callable = None):
  problem= sort_by(problem, key=f)
  problem= remove_duplicates(problem)
  return problem

In [18]:
Individual = namedtuple("Individual", ["genome", "fitness"])

def create_genome(problem):
    genome = []
    numbers_found = set()

    while numbers_found != GOAL:

        n_random = random.choice(range(0, len(problem)))
        genome.append(tuple(problem[n_random]))
        numbers_found |= set(problem[n_random])

        problem.pop(n_random)
        
    return genome
    
def fitness(genome):
    
    cnt = Counter()
    cnt.update(sum((e for e in genome), start=()))

    # Counting (Number of useful elements, Numbers of redundant elements)
    return tuple([sum(cnt[c] - 1 for c in cnt if cnt[c] > 1), -sum(cnt[c] == 1 for c in cnt)])

def tournament(population, tournament_size=2):
    return min(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def mutation(g, problem):

    # Deleting a random Gene (= List)
    point = random.randint(0, len(g) - 1)
    g.pop(point)

    # Numbers covered without the Gene chosen previously
    numbers_found = set()
    for element in g:
        numbers_found != set(element)

    # Counter to avoid infinit loops
    steps = 0

    while numbers_found != GOAL:
        steps += 1

        if steps == 10000:
            # No Solution found in a reasonable number of step
            return None
        
        # Choosing a list from the problem randomly and
        # Adding it to the candidate solution (Genome)
        n_random = random.choice(range(0, len(problem)))

        # Avoiding to have equal lists inside the Genome
        if not any(list == tuple(problem[n_random]) for list in g):
            g.append(tuple(problem[n_random]))
            numbers_found |= set(problem[n_random])

            problem.pop(n_random)

    return g

**Problem Definition**

In [19]:
def problem(N, seed=42):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

## Genetic Algorithm

### Initial Population

In [20]:
population = list()

all_list = preproc(problem(N))

for _ in range(POPULATION_SIZE):
    
    genome = create_genome(all_list.copy())
    population.append(Individual(genome, fitness(genome)))

logging.info(f"init: pop_size={len(population)}; min={min(population, key=lambda i: i.fitness)[1]}")

### Evolution

In [21]:
st = time()

for g in range(NUM_GENERATIONS):
    offspring = list()
    for i in range(OFFSPRING_SIZE):

        # Selection of parents
        p = tournament(population.copy())

        # Offspring generation
        o = mutation(p.genome.copy(), all_list.copy())

        # Check if the mutation returned a valid solution.
        # In this code, only valid solutions has been considered.
        # Possible Improvement: Acceptance with penalties of non-valid solutions
        if o == None:
            continue
    
        # Fitness of Offspring
        f = fitness(o)
        
        offspring.append(Individual(o, fitness(o)))
    
    # Adding new Offspings generated to Population list
    population+=offspring

    # Sorting the Population, according to their fitness and selecting the firsts n_elements = POPULATION_SIZE
    population = sorted(population, key=lambda i: i.fitness)[:POPULATION_SIZE]
    
et  = time()
print("Winner: ", population[0])
print("Cost: ", sum(len(element) for element in population[0].genome))
print("Bloat= ", int(sum(len(element) for element in population[0].genome)/N * 100), "%")
print(f"Elapsed time: {et - st}s")


Winner:  Individual(genome=[(0, 1, 3, 4, 8, 9, 13, 14, 23, 29, 30, 31, 32, 34, 35, 39, 40, 42, 45), (0, 1, 33, 35, 36, 5, 6, 38, 10, 43, 46, 15, 49, 18, 20, 22, 24, 26), (2, 3, 7, 11, 12, 16, 17, 18, 21, 22, 23, 24, 25, 27, 28, 31, 41, 44, 47, 48, 49), (3, 36, 37, 4, 39, 40, 41, 9, 43, 48, 17, 19, 20, 22, 28, 29, 30)], fitness=(25, -27))
Cost:  75
Bloat=  150 %
Elapsed time: 18.309167623519897s
