# Lab 2
Use *genetic algorithms* (GA) to solve the set cover problem.

In [2]:
# given function to yield list of lists
import random

def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [3]:
# logging
import logging
logging.basicConfig(format="%(message)s", level=logging.INFO)

## Solution outline, $(\mu + \lambda)$ algorithm
### (also $(\mu , \lambda)$ possible if `update_population_comma` is used instead of `update_population_plus`
1. Create initial population by selecting a random subset of the list of lists
2. Compute fitness to rank population
3. Choose parents using roulette wheel with ranking system
4. Generate offspring by cross-over, mutate offspring with low probability
5. Choose the best solutions of population and offspring (*or just best from offspring*)
6. Repeat step 2-5

In [4]:
# imports
from itertools import groupby
import random

In [5]:
# helping functions

# find out which elems are in the lists to compute violated constraints
def lists_to_set(lists):
    list_elems = [single_elem for l in lists for single_elem in l]
    s = set(list_elems)
    return s

# find out how many duplicates there are in the population
def count_duplicates(lists):
    list_elems = [single_elem for l in lists for single_elem in l]
    duplicates = sum([len(list(group))-1 for key, group in groupby(sorted(list_elems))])
    return duplicates

In [6]:
# to initialize the population
def create_population(STATE_SPACE, GOAL):
    population = []
    for _ in range(POPULATION_SIZE):
        individual = random.choices(STATE_SPACE,k=random.randint(1,len(STATE_SPACE)))
        fitness = compute_fitness(individual, GOAL)
        population.append((individual,fitness))
    return population

# fitness is a tuple of (-#of_elems_missing,-#duplicates) which should be maximized
def compute_fitness(individual, GOAL):
    # violated constraints, i.e. how many elements are missing
    vc = GOAL.difference(lists_to_set(individual))
    duplicates = count_duplicates(individual)
    # it is worse to lack elements than having duplicates
    fitness = (-len(vc), -duplicates)
    return fitness

def goal_check(individual, GOAL):
    if GOAL==lists_to_set(individual) :
        return True
    else: return False

"""
parent selection using ranking system
P(choose fittest parent) = POPULATION_SIZE/n_slots
P(choose second fittest parent) = (POPULATION_SIZE-1)/n_slots
...
P(choose least fit parent) = 1/n_slots
"""
def parent_selection(population):
    ranked_population = sorted(population, key=lambda t : t[1], reverse=True)
    # number of slots in spinning wheel = POPULATION_SIZE(POPULATION_SIZE+1)/2 (arithmetic sum)
    n_slots = POPULATION_SIZE*(POPULATION_SIZE+1)/2
    wheel_number = random.randint(1,n_slots)
    curr_parent = 0
    parent_number = POPULATION_SIZE
    increment = POPULATION_SIZE-1
    while wheel_number > parent_number:
        curr_parent +=1
        parent_number +=increment
        increment -= 1
    return ranked_population[curr_parent]

# make one child from each cross-over, and mutate with low prob
def cross_over(parent1, parent2, STATE_SPACE):
    cut1 = random.randint(0,len(parent1[0]))
    cut2 = random.randint(0,len(parent2[0]))
    child = parent1[0][:cut1]+parent2[0][cut2:]
    if random.random() < 0.01:
        mutate(child, STATE_SPACE)
    return child

# with low probability, remove a random gene (sublist) from child and add a random gene
def mutate(child, STATE_SPACE):
    idx = random.randint(0,len(child))
    child = child[:idx] + child[idx+1:] + STATE_SPACE[random.randint(0,len(STATE_SPACE)-1)]
    return child

def update_population_plus(population, offspring):
    tot = population + offspring
    ranked_population = sorted(tot, key=lambda t : t[1], reverse=True)
    return ranked_population[:POPULATION_SIZE]

def update_population_comma(offspring):
    ranked_pop = sorted(offspring, key=lambda t : t[1], reverse=True)
    return ranked_pop[:POPULATION_SIZE]

In [13]:
import numpy as np

def solve_problem(N):
    STATE_SPACE = problem(N,seed=42)
    GOAL = set(range(N))
    population = create_population(STATE_SPACE, GOAL)
    # track best results through generations
    best_w = -np.inf
    best_iter = 0
    for i in range(ITERS):
        offspring = []
        for __ in range(OFFSPRING_SIZE):
            parent1, parent2 = parent_selection(population), parent_selection(population)
            child = cross_over(parent1,parent2, STATE_SPACE)
            child_fitness = compute_fitness(child, GOAL)
            offspring.append((child,child_fitness))
        population = update_population_plus(population, offspring)
        #population = update_population_comma(offspring)
        best_ind = sorted(population, key=lambda l : l[1], reverse=True)[0]
        if goal_check(best_ind[0],GOAL) and best_ind[1][1] > best_w:
            best_w = best_ind[1][1]
            best_iter = i
        #if _ % 20 == 0:
         #   logging.info(f'Iter {_}, best weight: {sorted(population, key=lambda l : l[1], reverse=True)[0][1]}')
        #if _ == ITERS-1:
    best_ind2 = sorted(population, key=lambda l : l[1], reverse=True)[0]
    #best_ind_sorted = sorted([single_elem for l in best_ind[0] for single_elem in l])
    #logging.info(f'Best individual: {best_ind_sorted}')
    #logging.info(f'With weight: {best_ind[1]}')
    if goal_check(best_ind2[0], GOAL):
        logging.info(f'Solution reached for N = {N}')
    #logging.info(f'The best weight: {-best_w} was found in iter {best_iter}')
    return population



In [15]:
# main

# settings
POPULATION_SIZE = 50
OFFSPRING_SIZE = 30
ITERS = 100

for N in [5,10,20,50,100,1000,2000]:
    final_pop = solve_problem(N)
    best_ind = sorted(final_pop, key=lambda li : li[1], reverse=True)[0]
    logging.info(f'The best weight: {-best_ind[1][1]}')
    print()


Solution reached for N = 5
The best weight: 0
Solution reached for N = 10
The best weight: 0






Solution reached for N = 20
The best weight: 5





Solution reached for N = 50
The best weight: 40





Solution reached for N = 100
The best weight: 113





Solution reached for N = 1000
The best weight: 2995





Solution reached for N = 2000
The best weight: 7617



