### The Set Covering 📔 Problem Using Genetic Algorithms

> Sidharrth Nagappan, 2022

In this notebook, we will take a GA approach to solving the set-covering problem. As a background, let's assume we have 500 potential lists that should form a complete subset.

The final product should be a list of 0s and 1s that indicate which lists should be included in the final set. We use a genetic approach to obtain this list via:

1. Mutation: randomly change a 0 to a 1 or vice versa
2. Crossover: randomly select a point in the list and swap the values after that point


In [1]:
import logging
from collections import namedtuple
import random
from matplotlib import pyplot as plt

In [2]:
POPULATION_SIZE = 200
# Each generation generates 3 children
OFFSPRING_SIZE = 150
# Number of generations to run
NUM_GENERATIONS = 1000

# Each Individual has a genome and a computed fitness
Individual = namedtuple('Individual', ['genome', 'fitness'])


#### Generating the Initial Population


In [3]:
population = list()


def problem(N, seed=42):
    '''
    Generates the problem set for the given N.
    '''
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1)
             for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

N = 100
prob = problem(N, seed=42)

PROBLEM_SIZE = len(prob)

In [4]:
prob[3]

[4,
 7,
 20,
 21,
 28,
 29,
 31,
 34,
 40,
 41,
 48,
 59,
 68,
 71,
 77,
 81,
 87,
 88,
 93,
 98,
 99]

In [5]:
import numpy as np

def calculate_fitness(genome):
    '''
    Calculates the fitness of the given genome.
    The fitness is the number of unique elements
    The weight is the total number of elements in the genome
    '''
    # fitness is number of distinct elements in genome
    all_elements = []
    distinct_elements = set()
    weight = 0
    for subset, gene in zip(prob, genome):
        # if the particular element should be taken
        if gene == 1:
            distinct_elements.update(subset)
            weight += len(subset)
            all_elements += subset
    num_duplicates = len(all_elements) - len(set(all_elements))
    num_undiscovered_elements = len(set(range(N)) - distinct_elements)
    # print(set(range(N)) - distinct_elements)
    # print("num_undiscovered_elements", num_undiscovered_elements)
    # return num_undiscovered_elements, -weight
    # return len(distinct_elements), -weight
    # return num_undiscovered_elements / (len(distinct_elements) + 1), -weight
    return len(distinct_elements) / (num_undiscovered_elements + 1), -weight
    # other potential fitness functions:
    # return len(distinct_elements) / (num_duplicates + 1)
    # return len(distinct_elements) / (num_duplicates + 1) - num_undiscovered_elements, -weight
    # return len(distinct_elements) / (num_undiscovered_elements + 1), -weight

def generate_element():
    '''
    Randomly generates offspring made up of 0s and 1s.
    1 means the element is taken, 0 means it is not.
    '''
    genome = [random.randint(0, 1) for _ in range(N)]
    fitness = calculate_fitness(genome)
    # genome = np.random.choice([True, False], size=PROBLEM_SIZE)
    return Individual(genome, fitness)

initial_population = [generate_element() for _ in range(POPULATION_SIZE)]

len(initial_population)


30

In [6]:
len(initial_population[0].genome)


100

#### Mutation and Recombination

Types of mutations:
1. Swap
2. Bit Flip
3. Scramble


In [7]:
import itertools

fitness_log = []

def calculate_weight(genome):
    '''
    Weight Function
    Weight is the sum of the lengths of the subsets that are taken
    '''
    # select the subsets from prob based on the best individual
    final = [prob[i] for i, gene in enumerate(genome) if gene == 1]
    weight = len(list(itertools.chain.from_iterable(final)))
    return weight

def choose_mutation_rate(fitness_log):
    # choose mutation rate based on change in fitness_log
    if len(fitness_log) == 0:
        return 0.2
    if len(fitness_log) < 3:
        considered_elements = len(fitness_log)
    else:
        considered_elements = 3
    growth_rate = np.mean(np.diff(fitness_log[-considered_elements:]))
    if growth_rate <= 0:
        return 0.4
    elif growth_rate < 0.5:
        return 0.3
    elif growth_rate < 1:
        return 0.01
    else:
        return 0.1

def plateau_detection(num_generations, fitness_log):
    '''
    Checks if the fitness has plateaued for the last num_generations.
    '''
    if len(fitness_log) < num_generations:
        return False
    return all(fitness_log[-num_generations] == fitness_log[-i] for i in range(1, num_generations))

def flip_mutation(genome, mutate_only_one_element=False):
    '''
    Flips random bit(s) in the genome.
    Parameters:
    mutate_only_one_element: If True, only one bit is flipped.
    '''
    modified_genome = genome.copy()
    if mutate_only_one_element:
        # flip a random bit
        index = random.randint(0, len(modified_genome) - 1)
        modified_genome[index] = 1 - modified_genome[index]
    else:
        # flip a random number of bits
        num_to_flip = choose_mutation_rate(fitness_log) * len(modified_genome)
        to_flip = random.sample(range(len(modified_genome)), int(num_to_flip))
        # to_flip = random.sample(range(len(modified_genome)), random.randint(0, len(modified_genome)))
        modified_genome = [1 - modified_genome[i] if i in to_flip else modified_genome[i] for i in range(len(modified_genome))]

    return modified_genome
    # mutate only if it brings some benefit to the weight
    # if calculate_weight(modified_genome) < calculate_weight(genome):
    #     return modified_genome
    

def return_best_genome(genome1, genome2):
    return genome1
    # if calculate_fitness(genome1) > calculate_fitness(genome2):
    #     return genome1
    # else:
    #     return genome2

def mutation(genome):
    '''
    Runs a randomly chosen mutation on the genome. Mutations are:
    1. Bit Flip Mutation
    2. Scramble Mutation
    3. Swap Mutation
    4. Inversion Mutation
    Refer to README for more details.
    '''
    # check type of genome (debugging)
    # if type(genome) == tuple:
    #     print("genome is tuple")
    #     print(genome)
    
    possible_mutations = [flip_mutation, scramble_mutation, swap_mutation, inversion_mutation]
    chosen_mutation = random.choice(possible_mutations)
    return chosen_mutation(genome)

    # if random.random() < 0.1:
    #     for _ in range(num_elements_to_mutate):
    #         index = random.randint(0, len(genome) - 1)
    #         genome[index] = 1 - genome[index]
    # mutate a random number of elements
    # to_flip = random.randint(0, len(genome))
    # # flip the bits
    # return [1 - genome[i] if i < to_flip else genome[i] for i in range(len(genome))]

def scramble_mutation(genome):
    '''
    Randomly scrambles the genome.
    '''
    # select start and end indices to scramble
    modified_genome = genome.copy()
    start = random.randint(0, len(modified_genome) - 1)
    end = random.randint(start, len(modified_genome) - 1)
    # scramble the elements
    modified_genome[start:end] = random.sample(modified_genome[start:end], len(modified_genome[start:end]))
    return return_best_genome(modified_genome, genome)

def swap_mutation(genome):
    '''
    Randomly swaps two elements in the genome.
    '''
    modified_genome = genome.copy()
    index1 = random.randint(0, len(modified_genome) - 1)
    index2 = random.randint(0, len(modified_genome) - 1)
    modified_genome[index1], modified_genome[index2] = modified_genome[index2], modified_genome[index1]
    return return_best_genome(modified_genome, genome)

def inversion_mutation(genome):
    '''
    Randomly inverts the genome.
    '''
    modified_genome = genome.copy()
    # select start and end indices to invert
    start = random.randint(0, len(modified_genome) - 1)
    end = random.randint(start, len(modified_genome) - 1)
    # invert the elements
    modified_genome = modified_genome[:start] + modified_genome[start:end][::-1] + modified_genome[end:]
    return return_best_genome(modified_genome, genome)

def crossover(genome1, genome2):
    '''
    Crossover the two genomes by randomly selecting a point
    '''
    # crossover at a random point
    crossover_point = random.randint(0, len(genome1))
    modified_genome = genome1[:crossover_point] + genome2[crossover_point:]
    # if type(modified_genome) == tuple:
    #     print(f"genome1 type is {type(genome1)}")
    #     print(f"genome2 type is {type(genome2)}")
    #     print("modified_genome is tuple")
    #     print(modified_genome)
    return modified_genome

def roulette_wheel_selection(population):
    '''
    Selects an individual from the population based on the fitness.
    '''
    # calculate the total fitness of the population
    total_fitness = sum([individual.fitness[0] for individual in population])
    # select a random number between 0 and the total fitness
    random_number = random.uniform(0, total_fitness)
    # select the individual based on the random number
    current_fitness = 0
    for individual in population:
        current_fitness += individual.fitness[0]
        if current_fitness > random_number:
            return individual

def stochastic_universal_sampling(population):
    '''
    Select using Stochastic Universal Sampling.
    '''
    point_1 = random.uniform(0, 1)
    point_2 = point_1 + 1
    # In Progress
        
def rank_selection(population):
    '''
    Select using Rank Selection. Read more here:
    https://www.tutorialspoint.com/genetic_algorithms/genetic_algorithms_parent_selection.htm
    '''
    # sort the population based on the fitness
    population.sort(key=lambda x: x.fitness[0], reverse=True)
    # calculate the total rank
    total_rank = sum([i for i in range(len(population))])
    # select a random number between 0 and the total rank
    random_number = random.uniform(0, total_rank)
    # select the individual based on the random number
    current_rank = 0
    for i, individual in enumerate(population):
        current_rank += i
        if current_rank > random_number:
            return individual
    
    
def tournament(population, selection_method='tournament'):
    '''
    Selects the best individual from a random sample of the population.
    '''
    if selection_method == 'roulette':
        participant = roulette_wheel_selection(population)
        participant = Individual(participant.genome, participant.fitness)
    elif selection_method == 'rank':
        participant = rank_selection(population)
        participant = Individual(participant.genome, participant.fitness)
    else:
        participant = max(random.sample(population, k=2), key=lambda x: x.fitness)
        participant = Individual(participant.genome, participant.fitness)
    return participant

def generate(population, generation):
    '''
    Create offspring from the population using either:
    1. Cross Over + Mutation
    2. Mutation
    '''
    # can either cross over between two parents or mutate a single parent
    if random.random() < 0.2:
        parent = tournament(population)
        # if random.random() <= 0.3:
        #     genome = mutation(parent.genome)
        genome = mutation(parent.genome)
        child = Individual(parent, calculate_fitness(parent))
    else:
        # crossover
        parent1 = tournament(population)
        parent2 = tournament(population)
        genome = crossover(parent1.genome, parent2.genome)
        # if random.random() <= 0.3:
        #     genome = mutation(genome)
        genome = mutation(genome)
        child = Individual(genome, calculate_fitness(genome))

    fitness_log.append((generation + 1, child.fitness[0]))

    return child


In [8]:
import itertools

best = max(initial_population, key=lambda x: x.fitness)

best_individual = max(initial_population, key=lambda x: x.fitness)
for i in range(NUM_GENERATIONS):
    # create offspring
    offspring = [generate(initial_population, i) for i in range(OFFSPRING_SIZE)]
    # calculate fitness
    # offspring = [Individual(child.genome, calculate_fitness(child.genome)) for child in offspring]
    
    initial_population = initial_population + offspring
    initial_population = sorted(initial_population, key=lambda x: x.fitness, reverse=True)[:POPULATION_SIZE]    

    fittest_offspring = max(initial_population, key=lambda x: x.fitness)

    if fittest_offspring.fitness > best_individual.fitness:
        best_individual = fittest_offspring

# get the best individual
print(calculate_weight(best_individual.genome))

240


In [9]:
prob[1]

[0,
 11,
 12,
 13,
 19,
 20,
 27,
 28,
 35,
 43,
 44,
 45,
 48,
 53,
 54,
 57,
 69,
 75,
 83,
 89,
 91,
 97]

In [10]:
calculate_weight(best_individual.genome)

240

In [11]:
iter

<function iter>

In [12]:
best_individual.genome

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1]

In [13]:
prob

[[64, 35, 3, 69, 4, 71, 11, 75, 13, 77, 17, 86, 54, 27, 28, 29, 94, 31],
 [0,
  11,
  12,
  13,
  19,
  20,
  27,
  28,
  35,
  43,
  44,
  45,
  48,
  53,
  54,
  57,
  69,
  75,
  83,
  89,
  91,
  97],
 [5,
  8,
  10,
  12,
  15,
  20,
  24,
  26,
  29,
  33,
  34,
  35,
  37,
  45,
  46,
  47,
  48,
  58,
  68,
  70,
  73,
  79,
  80,
  81,
  82,
  84,
  85,
  87,
  89,
  90,
  93,
  98],
 [4,
  7,
  20,
  21,
  28,
  29,
  31,
  34,
  40,
  41,
  48,
  59,
  68,
  71,
  77,
  81,
  87,
  88,
  93,
  98,
  99],
 [8,
  11,
  17,
  18,
  27,
  28,
  31,
  33,
  34,
  40,
  46,
  50,
  51,
  54,
  58,
  63,
  65,
  68,
  71,
  72,
  74,
  82,
  83,
  91,
  95,
  96],
 [32, 1, 67, 68, 70, 8, 76, 14, 80, 49, 48, 19, 20, 54, 87, 59, 92],
 [0,
  2,
  7,
  10,
  13,
  14,
  19,
  20,
  22,
  25,
  30,
  33,
  34,
  37,
  38,
  39,
  41,
  43,
  46,
  47,
  55,
  58,
  62,
  64,
  67,
  69,
  72,
  76,
  77,
  80,
  81,
  82,
  92,
  97,
  98,
  99],
 [8,
  16,
  21,
  25,
  27,
  33,
  54,

In [14]:
# # plot the fitness
# import matplotlib.pyplot as plt

# # plot fitness against generation, limit y-axis range to 500 and 501
# plt.plot([i[0] for i in fitness_log], [i[1] for i in fitness_log])
# plt.ylim(500, 501)

In [15]:
# fitness_log