In [39]:
import numpy
import random
import logging
#import matplotlib as plt
import itertools
from matplotlib import pyplot as plt

In [40]:

POPULATION_SIZE = 100
OFFSPRING_SIZE = 1000

NUM_GENERATIONS = 200
MUTATION_RATE = 0.5

In [41]:
logging.basicConfig(format="%(message)s", level=logging.INFO)
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

def remove_duplicates(P):
    set_list = []
    for p in P:
        if p not in set_list and len(p) != 0:
            set_list.append(p)
    return set_list

def total_len(genome):
    return sum(len(p) for p in genome)
    
def fitness(genome):
    #logging.info(f"{genome}")
    if sum(len(p) for p in genome) == 0:
        pass
        # logging.info(f"{genome}")
    return (len(set(itertools.chain(*genome))), 1/sum(len(p) for p in genome))

def tournament(population,tournamen_size = 10):
    return max(random.choices(population,k=tournamen_size), key=lambda i: i[1])

def cross_over(g1,g2):
    
    
    if random.random() < 0.5:
        cut1 = random.randint(1,len(g1))
        cut2 = random.randint(0,len(g2))
        new_genome = g1[:cut1] + g2[cut2:]
    else:
        cut1 = random.randint(0,len(g1))
        cut2 = random.randint(1,len(g2))
        new_genome = g2[:cut2] + g1[cut1:]
    #logging.info(f"from {g1} and {g2}: {new_genome}")
    return new_genome

def mutation(genome,problem):
    index_mut_elem = random.randint(0,len(genome) -1)
    if random.random() > 0.8 and len(genome) > 1:
        return genome[:index_mut_elem]+ genome[index_mut_elem+1:]
    
    new_elem = random.choice(problem)
    return genome[:index_mut_elem]+ [new_elem] + genome[index_mut_elem+1:]
    

    

def goal_test(genome):
    return set(itertools.chain(*genome[0])) == set(range(0,N))

def evolve(population, mutation_rate):

    num_fitness_call = len(P)

    for g in range(NUM_GENERATIONS):
        offspring = []
        for i in range(OFFSPRING_SIZE):
            if random.random() < mutation_rate:
                p = tournament(population)[0]
                o = mutation(p,P)
                #logging.info(f"p:{p} o:{o}")
            else:
                p1 = tournament(population)[0]
                p2 = tournament(population)[0]
                o = cross_over(p1,p2)
                #logging.info(f"p1:{p1} p2:{p2} o:{o}")
            #logging.info(f"new element:{o}")
            num_fitness_call += 1
            offspring.append((o,fitness(o)))

            
        population = offspring 
        #logging.info(f"population:{population}")
        population = sorted(population,key=lambda i:i[1],reverse=True)[:POPULATION_SIZE]
        

    #logging.info(f"Solution cost: {population[0][1]}")
    #logging.info(f"#fitness call: {num_fitness_call}")
    return population[0]
    

## Results

In [42]:

n = 5
P = problem(n,seed=42)
P = remove_duplicates(P)

population = [([a],fitness([a]))  for a in P]
population = sorted(population,key=lambda i:i[1],reverse=True)
best = evolve(population, 0.5)
logging.info(f"Results for N={n}")
logging.info(f"\t\t total len: [{1/best[1][1]}]")

Results for N=5
		 total len: [5.0]


In [43]:
n = 10
P = problem(n,seed=42)
P = remove_duplicates(P)

population = [([a],fitness([a]))  for a in P]
population = sorted(population,key=lambda i:i[1],reverse=True)
best = evolve(population, 0.5)
logging.info(f"Results for N={n}")
logging.info(f"\t\t total len: [{1/best[1][1]}]")


Results for N=10
		 total len: [10.0]


In [44]:
n = 20
P = problem(n,seed=42)
P = remove_duplicates(P)

population = [([a],fitness([a]))  for a in P]
population = sorted(population,key=lambda i:i[1],reverse=True)
best = evolve(population, 0.5)
logging.info(f"Results for N={n}")
logging.info(f"\t\t total len: [{1/best[1][1]}]")


Results for N=20
		 total len: [24.0]


In [45]:
n = 100
P = problem(n,seed=42)
P = remove_duplicates(P)

population = [([a],fitness([a]))  for a in P]
population = sorted(population,key=lambda i:i[1],reverse=True)
best = evolve(population, 0.5)
logging.info(f"Results for N={n}")
logging.info(f"\t\t total len: [{1/best[1][1]}]")


Results for N=100
		 total len: [192.0]


In [46]:


n = 1000
P = problem(n,seed=42)
P = remove_duplicates(P)

population = [([a],fitness([a]))  for a in P]
population = sorted(population,key=lambda i:i[1],reverse=True)
best = evolve(population, 0.5)
logging.info(f"Results for N={n}")
logging.info(f"\t\t total len: [{1/best[1][1]}]")

Results for N=1000
		 total len: [2937.0]
