In [None]:
import random
import numpy as np
import csv
import math
import os
import pandas as pd

In [None]:
def read_data(path):
    S = []
    costs = []
    first = True
    with open(path, "r") as f:
        csv.reader(f, delimiter=" ")
        
        for line in f:
            info = line.split(" ")
            if first:
                first = False
                n_sets = info[0]
                n_elems = info[1]
            else:
                s = set()
                costs.append(int(info[0]))
                info.pop(0)
                for e in info:
                    e.replace("\n", "")
                    s.add(int(e))
                S.append(s)
    return int(n_elems), int(n_sets), S, costs

In [None]:
def initial_population(n_sets, n_indiv):
    population = []
    for _ in range(n_indiv):
        indiv = []
        for _ in range(n_sets):
            indiv.append(random.randint(0,1))
        population.append(indiv)
    return population

In [None]:
def crossover(population, n_sets):
    i = 0
    n_news = 2
    new_indivs = []

    while i < len(population) - 1:
    # Pair
    predecessors = []
    predecessors.append(population[i])
    predecessors.append(population[i+1])
    # Choose random predecessor for each allele
    for i_indiv in range(n_news):
        new_indiv = []
        for i_allele in range(n_sets):
            new_indiv.append(predecessors[random.randint(0,1)][i_allele])
        new_indivs.append(new_indiv)
    i += 2

    return new_indivs

In [None]:
def mutation(indiv, mutation_prob, n_sets):
    for i in range(n_sets):
        mutate = np.random.choice(
          [True, False], 
          1,
          p=[mutation_prob, 1-mutation_prob]
        )[0]
        if mutate:
            indiv[i] = int(not indiv[i])

In [None]:
def isSolution(indiv, sets):
  # True if the lenght of the set containing every elem in the chosen sets is equal to n_elems
    sets_elems = set()
    for i, set_ in enumerate(sets):
    if indiv[i]:
        for elem in set_:
            sets_elems.add(elem)

    return len(sets_elems) == n_elems

In [None]:
def no_sol_fitness(indiv, sets, n_elems):
    elems = set()
    
    for i, tf in enumerate(indiv):
        s = sets[i]
        if tf:
            for e in s:
                elems.add(e)

    left = n_elems - len(elems)
    return left * 1000

In [None]:
def fitness(indiv,  sets, costs, n_elems):
    total_cost = 0

    if not isSolution(indiv,  sets):
    total_cost = no_sol_fitness(indiv, sets, n_elems)
    else:
    for i, set_ in enumerate(sets):
        if indiv[i]:
            total_cost += costs[i]
    return total_cost

In [None]:
def get_random_indiv(already_chosen):
    indiv = random.randint(0,len(population)-1)

    while indiv in already_chosen:
        indiv = random.randint(0,len(population)-1)

    return indiv

In [None]:
def get_fittest(population, sets, costs, n_elems):

    best_fitness = math.inf
    i_fittest = -1

    for i, indiv in enumerate(population):

    value = fitness(indiv, sets, costs, n_elems)
    if value < best_fitness:
        best_fitness = value
        i_fittest = i

    return best_fitness, i_fittest

In [None]:
def selection(population, sets, costs, n_elems):
    # Tournament
    already_chosen = set()
    pairs = []
    new_population = []

   
    while not (len(already_chosen) == len(population)):
      
        indiv1 = get_random_indiv(already_chosen)
        already_chosen.add(indiv1)
        

        indiv2 = get_random_indiv(already_chosen)
        already_chosen.add(indiv2)

        pairs = [population[indiv1], population[indiv2]]

        fitness, fittest = get_fittest(pairs, sets, costs, n_elems)
       
        new_population.append(pairs[fittest])

    return new_population

In [None]:
count = 1

path = os.path.join(os.getcwd(), 'dataset', "set_cover_200.txt")

n_elems, n_sets, sets, costs = read_data(path)
n_indiv = 10
population = []
mutation_prob = 0.4
population = initial_population(n_sets, n_indiv)
global_fittest = -1
global_best_fitness =  math.inf
left = math.inf
tries_without_change = 1000
since_not_changed = 0
#print("Sets: " + str(sets))
while since_not_changed < tries_without_change:
    print("-------------------------------------------------- (" + str(count) + ") ---------------------------------------------------------")
    #print("Selected population (" + str(len(population)) + "): " + str(population))

    new_indivs = crossover(population, n_sets)
    #print("Crossover: " + str(new_indivs))

    for indiv in new_indivs:
        mutation(indiv, mutation_prob, n_sets)
    #print("Mutation: " + str(new_indivs))

    population += new_indivs

    #print("New population (" + str(len(population)) + "): " + str(population))
    population = selection(population, sets, costs, n_elems)

    #Update
    local_best_fitness, local_fittest = get_fittest(population, sets, costs, n_elems)

    if global_best_fitness > local_best_fitness:
        global_best_fitness = local_best_fitness
        global_fittest = population[local_fittest]
        since_not_changed = 0
        left = no_sol_fitness(global_fittest, sets, n_elems)//1000
    else:
        since_not_changed += 1
        
    print("Left: " + str(left))
    print("Fittest: (" + str(global_best_fitness) + "): " + str(global_fittest))

    print("Iterations without changes: " + str(since_not_changed))

    count += 1


print("---------------------------------------------------------------------------------------------------------------------")
