Inspired by https://www.kaggle.com/code/ghalebdweikat/genetic-algorithm-tutorial

In [1]:
import numpy as np
import pandas as pd

In [2]:
df_machines = pd.read_csv('tsi_data/dataset_1.csv')
df_sites = pd.read_csv('tsi_data/scenario_1.csv')

In [3]:
df_machines

Unnamed: 0,machine,inventory,time,productivity
0,A1,1,1.0,1225
1,A2,1,1.5,1575
2,A3,1,2.5,2475
3,A4,1,2.5,1750
4,A5,1,3.0,1750
5,A6,1,3.5,3150
6,A7,1,2.5,2700
7,A8,1,2.5,3150
8,A9,1,3.5,3150
9,A10,1,4.0,3825


In [4]:
df_sites

Unnamed: 0,scenario,cleaning_area,cleaning_time
0,S1,23900,7
1,S2,19200,4
2,S3,16400,6
3,S4,11000,6
4,S5,10000,3


### Representation

We will take, for one site, the chromosome of length nb_machines, with 1 if the machine is used and 0 if it is not.

Example to clean a site with A1 and A2 :
[ 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ] 

### Generating random chromosomes / solutions

In [5]:
def generateChromosome(chromosome_size):
    """
    Returns a chromosome = an array of True (machine used) and False (machine not used)
    """
    return np.random.randint(2, size=chromosome_size).astype(bool) 

In [6]:
# Example: Select random machines
print(generateChromosome(10).astype(int))

[1 1 0 0 1 1 1 1 1 1]


In [7]:
def generateParents(population_size, chromosome_size):
    """
    Returns an array with a solution by site
    """
    return [generateChromosome(chromosome_size) for _ in range(population_size)]

In [8]:
# Example: A population of size 3
for x in generateParents(3, 10):
    print(x.astype(int))

[0 0 1 1 1 0 0 0 0 0]
[0 1 1 0 1 1 1 1 1 1]
[0 1 0 0 1 1 1 0 1 1]


### Getting total cleaned area for one chromosome

In [9]:
def totalCleanedArea(chromosome, machines):
    return machines["productivity"][chromosome].sum()

In [10]:
# Example with a random solution
chromosome = generateChromosome(len(df_machines))
print("Chromosome:", chromosome.astype(int))
print("Total cleaned area: {}m²".format(totalCleanedArea(chromosome, df_machines)))

Chromosome: [1 0 1 1 1 1 0 0 0 1 1 1 0 0 1 1 0 1 1 0 0 1 0 0]
Total cleaned area: 39545m²


### Mutations

In [11]:
def mutate(chromosome):
    """
    Mutate a random item of solution (0→1 or 1→0)
    """
    rand_index = np.random.randint(len(chromosome))
    chromosome[rand_index] = not chromosome[rand_index]
    return chromosome

In [12]:
chromosome = generateChromosome(len(df_machines))
print("Chromosome:", chromosome.astype(int))
mutate(chromosome)
print("Mutated:   ", chromosome.astype(int))

Chromosome: [1 0 0 0 1 0 0 1 1 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1]
Mutated:    [1 0 0 0 1 0 0 1 1 1 0 1 0 1 1 1 0 1 0 1 0 1 0 1]


### Update population

In [13]:
def updatePopulation(generation, population_size, machines):
    """
    Compute fitness of each chromosome and return the best ones
    """
    
    fitness = np.array([-1 * totalCleanedArea(chromosome, machines) for chromosome in generation])
    # -1 to reorder
    
    return [generation[i] for i in np.argsort(fitness)][:population_size]

### Crossover

In [14]:
def crossover(first_chromosome, second_chromosome):
    """
    Takes two parent chromosomes and returns two child chromosomes
    The first chromosome takes half of his digits in the first parent and the other half in the second parent
    The second chromosome takes the opposite halves.
    """
    n = len(first_chromosome)
    first_half = np.random.choice(n, round(n/2), False)
    
    first_child = [first_chromosome[i] if i in first_half else second_chromosome[i] for i in range(n)]
    second_child = [second_chromosome[i] if i in first_half else first_chromosome[i] for i in range(n)]
    return mutate(first_child), mutate(second_child)

In [15]:
crossover ([False, True, False, True], [False, True, True, True])

([False, True, True, True], [False, True, False, True])

### New Generation

In [16]:
def newGeneration(generation):
    """
    Keep the top 2 (already sorted by fitness), and build need
    child by crossover on all pairs of the top 4
    """
    top4 = generation[:4]
    
    new_gen = [generation[:2]]
    for i in range (4):
        for j in range (4):
            if i != j:
                childs = crossover(top4[i], top4[j])
                new_gen.append(childs[0])
                new_gen.append(childs[1])
                
    return new_gen

### Train

In [17]:
def train(machines, population_size, generationsPerSite):
    current_site = 1
    total_cleaned = 0.0
    while(True):
        if(len(machines) == 0):
            break
        parents = generateParents(population_size, len(machines))
        generation = updatePopulation(parents, population_size, machines)

    new_generation = generation
        for i in range(generationsPerSite):
            new_generation = newGeneration(new_generation)
            new_generation = updatePopulation(parents, population_size, machines)
        total_area_to_clean = machines["productivity"].sum()
        best_child_chrom = new_generation[0]
        
        current_cleaned_area = totalCleanedArea(best_child_chrom, machines)
        
        total_cleaned += current_cleaned_area
        machines_cleaned = str(machines[best_child_chrom]['machine'].values)
        print("Site{}: {} Cleaned area: {} m²".format(current_site, machines_cleaned, current_cleaned_area))
        
        machines = machines[[not x for x in best_child_chrom]]
        current_site = current_site + 1

In [18]:
population_size = 10
generationsPerSite = 3
machines = df_machines.copy()

train(machines, population_size, generationsPerSite)

Site1: ['A1' 'A2' 'A3' 'A4' 'A7' 'A8' 'A9' 'A10' 'B1' 'B2' 'B4' 'B5' 'C2' 'C4'
 'C5' 'D1' 'D3' 'D4'] Cleaned area: 67780 m²
Site2: ['A5' 'A6' 'B3' 'C1' 'D2'] Cleaned area: 16470 m²
Site3: ['C3'] Cleaned area: 2460 m²
