In [None]:
import random
import csv
import math

In [None]:
def read_data(path):
    S = []
    costs = []
    first = True
    with open(path, "r") as f:
        csv.reader(f, delimiter=" ")
        
        for line in f:
            info = line.split(" ")
            if first:
                first = False
                n_sets = info[0]
                n_elems = info[1]
            else:
                s = set()
                costs.append(int(info[0]))
                info.pop(0)
                for e in info:
                  e.replace("\n", "")
                  s.add(int(e))
                S.append(s)
    return int(n_elems), int(n_sets), S, costs

In [None]:
def initial_population(n_sets, n_indiv):
  population = []
  for _ in range(n_indiv):
    indiv = []
    for _ in range(n_sets):
      indiv.append(random.randint(0,1))
    population.append(indiv)
  return population

In [None]:
def crossover(population, n_sets):
  i = 0
  n_news = 2
  new_indivs = []

  while i < len(population) - 1:
    # Pair
    predecessors = []
    predecessors.append(population[i])
    predecessors.append(population[i+1])
    # Choose random predecessor for each allele
    for i_indiv in range(n_news):
      new_indiv = []
      for i_allele in range(n_sets):
        new_indiv.append(predecessors[random.randint(0,1)][i_allele])
      new_indivs.append(new_indiv)
    i += 2
    
  return new_indivs

In [None]:
def mutation(indiv, mutation_prob, n_sets):
  for i in range(n_sets):
    if random.random() <= mutation_prob:
      indiv[i] = int(not indiv[i])

In [None]:
def isSolution(indiv, sets):
  # True if the lenght of the set containing every elem in the chosen sets is equal to n_elems
  
  sets_elems = set()
  for i, set_ in enumerate(sets):
    if indiv[i]:
      for elem in set_:
        sets_elems.add(elem)
        
  return len(sets_elems) == n_elems

In [None]:
def no_sol_fitness(indiv, sets, n_elems):
  elems = set()
  for i, allele in enumerate(indiv):
    if allele:
      for e in sets[i]:
        elems.add(e)
  
  left = n_elems - len(elems)
  return left * 1000

In [None]:
def fitness(indiv,  sets, costs, n_elems):
  total_cost = 0

  if not isSolution(indiv,  sets):
    total_cost = no_sol_fitness(indiv, sets, n_elems)
  else:
    for i, set_ in enumerate(sets):
      if indiv[i]:
        total_cost += costs[i]
  return total_cost

In [None]:
def get_random_indiv(already_chosen):
  indiv = random.randint(0,len(population)-1)

  while indiv in already_chosen:
      indiv = random.randint(0,len(population)-1)
      
  return indiv

In [None]:
def get_fittest(population, sets, costs, n_elems):

  best_fitness = math.inf
  i_fittest = -1

  for i, indiv in enumerate(population):

    value = fitness(indiv, sets, costs, n_elems)
    if value < best_fitness:
      best_fitness = value
      i_fittest = i

  return best_fitness, i_fittest

In [None]:
def selection(population, sets, costs, n_elems):
    # Tournament
    already_chosen = set()
    pairs = []
    new_population = []

    while not (len(already_chosen) == len(population)):
      
      indiv1 = get_random_indiv(already_chosen)
      already_chosen.add(indiv1)
      
      indiv2 = get_random_indiv(already_chosen)
      already_chosen.add(indiv2)
      
      pairs = [population[indiv1], population[indiv2]]
     
      fitness, fittest = get_fittest(pairs, sets, costs, n_elems)
      
      new_population.append(pairs[fittest])

    return new_population

In [None]:
count = 1

n_elems, n_sets, sets, costs = read_data('/content/sample_data/set_cover_200.txt')
n_indiv = 10
population = []
mutation_prob = 0.2
population = initial_population(n_sets, n_indiv)
global_fittest = -1
global_best_fitness =  math.inf

tries_without_change = 50
since_not_changed = 0
print("Sets: " + str(sets))
while since_not_changed < tries_without_change:
  print("-------------------------------------------------- (" + str(count) + ") ---------------------------------------------------------")
  print("Selected population (" + str(len(population)) + "): " + str(population))

  new_indivs = crossover(population, n_sets)
  print("Crossover: " + str(new_indivs))

  for indiv in new_indivs:
    mutation(indiv, mutation_prob, n_sets)
  print("Mutation: " + str(new_indivs))

  population += new_indivs

  print("New population (" + str(len(population)) + "): " + str(population))
  population = selection(population, sets, costs, n_elems)

  #Termination
  local_best_fitness, local_fittest = get_fittest(population, sets, costs, n_elems)

  if global_best_fitness > local_best_fitness:
    global_best_fitness = local_best_fitness
    global_fittest = population[local_fittest]
    since_not_changed = 0
    left = no_sol_fitness(indiv, sets, n_elems)/1000
  else:
    since_not_changed += 1
  print("Left: " + str(left))
  print("Fittest: (" + str(global_best_fitness) + "): " + str(global_fittest))

  print("Iterations without changes: " + str(since_not_changed))
  
  count += 1


print("-------------------------------------------------------------------------------------------------------------------------")
print("-------------------------------------------------------------------------------------------------------------------------")

print("Count: " + str(count))
print("Fittest: (" + str(global_best_fitness) + "): " + str(global_fittest))


Sets: [{1, 2, 4, 5, 7, 8, 9, 10, 13, 14, 15, 17, 19, 20, 21, 23, 26, 27, 29, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 49, 53, 54, 56, 57, 58, 60, 61, 63, 64, 65, 66, 69, 71, 73, 74, 75, 76, 79, 81, 83, 84, 87, 89, 90, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 108, 109, 110, 112, 113, 118, 119, 120, 122, 125, 126, 127, 129, 130, 131, 133, 135, 136, 137, 139, 140, 142, 143, 144, 146, 147, 148, 150, 152, 154, 155, 156, 158, 160, 162, 163, 164, 165, 166, 168, 169, 170, 172, 174, 176, 177, 178, 179, 182, 183, 184, 185, 186, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 203, 204, 208, 210, 211, 212, 214, 218, 219, 220, 222, 225, 226, 227, 228, 229, 230, 232, 233, 234, 237, 238, 239, 240, 242, 247, 248, 249, 250, 252, 255, 258, 260, 261, 262, 263, 265, 266, 267, 268, 270, 272, 273, 274, 275, 276, 278, 282, 286, 287, 288, 292, 293, 295, 297, 298, 299, 300, 301, 302, 303, 305, 306, 308, 309, 310, 311, 312, 315, 316, 319, 320, 321, 323, 326, 327, 32