### The Set Covering 📔 Problem Using Genetic Algorithms

> Sidharrth Nagappan, 2022

In this notebook, we will take a GA approach to solving the set-covering problem. As a background, let's assume we have 500 potential lists that should form a complete subset.

The final product should be a list of 0s and 1s that indicate which lists should be included in the final set. We use a genetic approach to obtain this list via:

1. Mutation: randomly change a 0 to a 1 or vice versa
2. Crossover: randomly select a point in the list and swap the values after that point


In [52]:
import logging
from collections import namedtuple
import random
from matplotlib import pyplot as plt

In [53]:
POPULATION_SIZE = 30
# Each generation generates 3 children
OFFSPRING_SIZE = 20
# Number of generations to run
NUM_GENERATIONS = 1000

# Each Individual has a genome and a computed fitness
Individual = namedtuple('Individual', ['genome', 'fitness'])


#### Generating the Initial Population


In [54]:
population = list()


def problem(N, seed=42):
    '''
    Generates the problem set for the given N.
    '''
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1)
             for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

N = 1000
prob = problem(N, seed=42)

PROBLEM_SIZE = len(prob)

In [55]:
import numpy as np

def calculate_fitness(genome):
    '''
    Calculates the fitness of the given genome.
    The fitness is the number of unique elements / number of duplicates
    '''
    # fitness is number of distinct elements in genome
    all_elements = []
    distinct_elements = set()
    weight = 0
    for subset, gene in zip(prob, genome):
        # if the particular element should be taken
        if gene == 1:
            distinct_elements.update(subset)
            weight += len(subset)
            all_elements += subset
    num_duplicates = len(all_elements) - len(set(all_elements))
    num_undiscovered_elements = len(set(range(N)) - distinct_elements)
    return len(distinct_elements), -weight
    # return len(distinct_elements) / (num_duplicates + 1)
    # return len(distinct_elements) / (num_duplicates + 1) - num_undiscovered_elements
    # return len(distinct_elements) / (num_undiscovered_elements + 1)

def generate_element():
    '''
    Randomly generates offspring made up of 0s and 1s.
    1 means the element is taken, 0 means it is not.
    '''
    genome = [random.randint(0, 1) for _ in range(N)]
    fitness = calculate_fitness(genome)
    # genome = np.random.choice([True, False], size=PROBLEM_SIZE)
    return Individual(genome, fitness)

initial_population = [generate_element() for _ in range(POPULATION_SIZE)]

len(initial_population)


30

In [56]:
len(initial_population[0].genome)


1000

#### Mutation and Recombination

Types of mutations:
1. Swap
2. Bit Flip
3. Scramble


In [57]:
import itertools

def calculate_weight(genome):
    '''
    Weight Function
    Weight is the sum of the lengths of the subsets that are taken
    '''
    # select the subsets from prob based on the best individual
    final = [prob[i] for i, gene in enumerate(genome) if gene]
    weight = len(list(itertools.chain(*final)))
    return weight

def flip_mutation(genome, mutate_only_one_element=True):
    '''
    Flips random bit(s) in the genome.
    Parameters:
    mutate_only_one_element: If True, only one bit is flipped.
    '''
    modified_genome = genome.copy()
    if mutate_only_one_element:
        # flip a random bit
        index = random.randint(0, len(modified_genome) - 1)
        modified_genome[index] = 1 - modified_genome[index]
    else:
        # flip a random number of bits
        to_flip = random.sample(range(len(modified_genome)), random.randint(0, len(modified_genome)))
        modified_genome = [1 - modified_genome[i] if i in to_flip else modified_genome[i] for i in range(len(modified_genome))]

    # mutate only if it brings some benefit to the weight
    # if calculate_weight(modified_genome) < calculate_weight(genome):
    #     return modified_genome
    
    return return_best_genome(modified_genome, genome)

def return_best_genome(genome1, genome2):
    return genome1
    # if calculate_fitness(genome1) > calculate_fitness(genome2):
    #     return genome1
    # else:
    #     return genome2

def mutation(genome):
    '''
    Runs a randomly chosen mutation on the genome. Mutations are:
    1. Bit Flip Mutation
    2. Scramble Mutation
    3. Swap Mutation
    4. Inversion Mutation
    Refer to README for more details.
    '''
    possible_mutations = [flip_mutation, scramble_mutation, swap_mutation, inversion_mutation]
    chosen_mutation = random.choice(possible_mutations)
    return chosen_mutation(genome)

    # if random.random() < 0.1:
    #     for _ in range(num_elements_to_mutate):
    #         index = random.randint(0, len(genome) - 1)
    #         genome[index] = 1 - genome[index]
    # mutate a random number of elements
    # to_flip = random.randint(0, len(genome))
    # # flip the bits
    # return [1 - genome[i] if i < to_flip else genome[i] for i in range(len(genome))]

def scramble_mutation(genome):
    '''
    Randomly scrambles the genome.
    '''
    # select start and end indices to scramble
    modified_genome = genome.copy()
    start = random.randint(0, len(modified_genome) - 1)
    end = random.randint(start, len(modified_genome) - 1)
    # scramble the elements
    modified_genome[start:end] = random.sample(modified_genome[start:end], len(modified_genome[start:end]))
    return return_best_genome(modified_genome, genome)

def swap_mutation(genome):
    '''
    Randomly swaps two elements in the genome.
    '''
    modified_genome = genome.copy()
    index1 = random.randint(0, len(modified_genome) - 1)
    index2 = random.randint(0, len(modified_genome) - 1)
    modified_genome[index1], modified_genome[index2] = modified_genome[index2], modified_genome[index1]
    return return_best_genome(modified_genome, genome)

def inversion_mutation(genome):
    '''
    Randomly inverts the genome.
    '''
    modified_genome = genome.copy()
    # select start and end indices to invert
    start = random.randint(0, len(modified_genome) - 1)
    end = random.randint(start, len(modified_genome) - 1)
    # invert the elements
    modified_genome = modified_genome[:start] + modified_genome[start:end][::-1] + modified_genome[end:]
    return return_best_genome(modified_genome, genome)

def crossover(genome1, genome2):
    '''
    Crossover the two genomes by randomly selecting a point
    '''
    # crossover at a random point
    crossover_point = random.randint(0, len(genome1))
    modified_genome = genome1[:crossover_point] + genome2[crossover_point:]
    return modified_genome

def tournament(population):
    '''
    Selects the best individual from a random sample of the population.
    '''
    participant = max(random.sample(population, k=2), key=lambda x: x.fitness)
    return Individual(participant.genome, participant.fitness)

def create_offspring(population):
    '''
    Create offspring from the population using either:
    1. Cross Over + Mutation
    2. Mutation
    '''
    # can either cross over between two parents or mutate a single parent
    if random.random() < 0.4:
        parent = tournament(population)
        genome = mutation(parent.genome)
        child = Individual(parent, calculate_fitness(parent))
    else:
        # crossover
        parent1 = tournament(population)
        parent2 = tournament(population)
        genome = crossover(parent1.genome, parent2.genome)
        genome = mutation(genome)
        child = Individual(genome, calculate_fitness(genome))

    return child
    # create offspring using tournament selection and crossover/mutation
    # parent1 = tournament(population)
    # parent2 = tournament(population)
    # first cross over
    # crossed_over = crossover(parent1.genome, parent2.genome)
    # # generate Individual with crossed over genome
    # child = Individual(crossed_over, None)
    # # mutate the child
    # child = Individual(mutation(child.genome), None)
    # return child

In [60]:
import itertools

best = max(initial_population, key=lambda x: x.fitness)

best_individual = max(initial_population, key=lambda x: x.fitness)
for i in range(NUM_GENERATIONS):
    # create offspring
    offspring = [create_offspring(initial_population) for i in range(OFFSPRING_SIZE)]
    # calculate fitness
    offspring = [Individual(child.genome, calculate_fitness(child.genome)) for child in offspring]
    
    initial_population = initial_population + offspring
    initial_population = sorted(initial_population, key=lambda x: x.fitness, reverse=True)[:POPULATION_SIZE]    

    fittest_offspring = max(initial_population, key=lambda x: x.fitness)

    if fittest_offspring.fitness > best_individual.fitness:
        best_individual = fittest_offspring

    # select the best individual
    # best_individual = max(offspring, key=lambda x: x.fitness)
    # add offspring to population
    # sort population by fitness and take the last few (number to take depends on offspring size)
    # initial_population[:OFFSPRING_SIZE] = offspring
    # print(f'Generation {i}: {calculate_weight(initial_population[0].genome)}')
    # print(f'Length in Generation {i}: {len(initial_population[0].genome)}')

# get the best individual
print(calculate_weight(best_individual.genome))

3582


In [189]:
len(list(itertools.chain(*prob)))

3175

In [181]:
len(initial_population)

3

In [109]:
import itertools



130875

In [57]:
len(prob)


1809

In [56]:
prob


[[3,
  12,
  13,
  15,
  16,
  22,
  23,
  28,
  35,
  36,
  40,
  44,
  47,
  49,
  51,
  52,
  63,
  71,
  79,
  81,
  83,
  87,
  98,
  101,
  107,
  110,
  111,
  112,
  114,
  116,
  119,
  125,
  135,
  136,
  138,
  140,
  142,
  148,
  150,
  166,
  172,
  174,
  176,
  181,
  183,
  185,
  186,
  189,
  193,
  194,
  214,
  216,
  229,
  232,
  235,
  236,
  258,
  273,
  274,
  279,
  282,
  285,
  287,
  295,
  301,
  302,
  308,
  309,
  311,
  316,
  321,
  325,
  327,
  331,
  332,
  338,
  343,
  346,
  349,
  350,
  352,
  357,
  359,
  360,
  366,
  373,
  377,
  379,
  388,
  390,
  393,
  395,
  397,
  412,
  413,
  414,
  424,
  427,
  431,
  433,
  437,
  441,
  443,
  445,
  453,
  456,
  472,
  473,
  479,
  490,
  498],
 [0,
  1,
  5,
  9,
  16,
  24,
  29,
  32,
  33,
  35,
  40,
  43,
  46,
  54,
  56,
  57,
  58,
  64,
  65,
  70,
  71,
  73,
  78,
  80,
  81,
  82,
  84,
  91,
  101,
  108,
  112,
  122,
  123,
  126,
  128,
  134,
  135,
  136,
  137,
  150