# Lab 2
Use *genetic algorithms* (GA) to solve the set cover problem.

In [1]:
# given function to yield list of lists
import random

def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [2]:
# logging
import logging
logging.basicConfig(format="%(message)s", level=logging.INFO)

## Solution outline, $(\mu + \lambda)$ algorithm
1. Create initial population by selecting a random subset of the list of lists
2. Compute fitness to rank population
3. Choose parents using roulette wheel with ranking system
4. Generate offspring by cross-over, mutate offspring with low probability
5. Choose the best solutions of population and offspring
6. Repeat step 2-5

In [None]:
# imports
from itertools import groupby
import random

In [119]:
POPULATION_SIZE = 30
OFFSPRING_SIZE = 100
N = 5
seed = 42
STATE_SPACE = problem(N, seed)
ITERS = 1000

In [47]:
# helping functions

# find out which elems are in the lists to compute violated constraints
def lists_to_set(lists):
    list_elems = [single_elem for l in lists for single_elem in l]
    s = set(list_elems)
    return s


def count_duplicates(lists):
    list_elems = [single_elem for l in lists for single_elem in l]
    duplicates = sum([len(list(group))-1 for key, group in groupby(sorted(list_elems))])
    return duplicates

In [130]:
def create_population(N,seed):
    population = []
    STATE_SPACE = problem(N,seed)
    for _ in range(POPULATION_SIZE):
        individual = random.choices(STATE_SPACE,k=random.randint(1,len(STATE_SPACE)))
        fitness = compute_fitness(individual)
        population.append((individual,fitness))
    return population

def compute_fitness(individual):
    # violated constraints, i.e. how many elements are missing
    vc = set(range(N)).difference(lists_to_set(individual))
    duplicates = count_duplicates(individual)

    # it is worse to lack elements than having duplicates
    fitness = -1000*len(vc)-duplicates
    # TODO: could return tuple as (-len(vc), duplicates)
    return fitness

# parent selection using ranking system
# P(choose fittest parent) = POPULATION_SIZE/n_slots
# P(choose second fittest parent) = (POPULATION_SIZE-1)/n_slots
# ...
# P(choose least fit parent) = 1/n_slots
def parent_selection(population):
    ranked_population = sorted(population, key=lambda t : t[1], reverse=True)
    # number of slots in spinning wheel = POPULATION_SIZE(POPULATION_SIZE+1)/2 (arithmetic sum)
    n_slots = POPULATION_SIZE*(POPULATION_SIZE+1)/2
    wheel_number = random.randint(1,n_slots)
    curr_parent = 0
    parent_number = POPULATION_SIZE
    increment = POPULATION_SIZE-1
    while wheel_number > parent_number:
        curr_parent +=1
        parent_number +=increment
        increment -= 1
    return ranked_population[curr_parent]


def cross_over(parent1, parent2):
    cut1 = random.randint(0,len(parent1[0]))
    cut2 = random.randint(0,len(parent2[0]))
    child = parent1[0][:cut1]+parent2[0][cut2:]
    # TODO: explore other cutting strategies
    if random.random() < 0.1:
        #logging.info(f'Mutating child')
        mutate(child)
    return child

# with low probability, remove a random gene (sublist) from child and add a random gene
def mutate(child):
    idx = random.randint(0,len(child))
    child = child[:idx] + child[idx+1:] + STATE_SPACE[random.randint(0,len(STATE_SPACE)-1)]
    return child

def update_population(population, offspring):
    tot = population + offspring
    ranked_population = sorted(tot, key=lambda t : t[1], reverse=True)
    return ranked_population[:POPULATION_SIZE]

In [143]:
# main

def main(N, ITERS):
    population = create_population(N, seed=42)
    for _ in range(ITERS):
        offspring = []
        for __ in range(OFFSPRING_SIZE):
            parent1, parent2 = parent_selection(population), parent_selection(population)
            child = cross_over(parent1,parent2)
            child_fitness = compute_fitness(child)
            offspring.append((child,child_fitness))
        population = update_population(population, offspring)
        if _ % 20 == 0:
            logging.info(f'Iter {_}, best weight: {sorted(population, key=lambda l : l[1], reverse=True)[0][1]}')
        if _ == ITERS-1:
            logging.info(f'Best individual: {sorted(population, key=lambda l : l[1], reverse=True)[0][1]}')



In [144]:
main(1000, 100)

Iter 0, best weight: -20394
Iter 20, best weight: -50
Iter 40, best weight: -50
Iter 60, best weight: -50
Iter 80, best weight: -50
Best individual: -50
