In [56]:

import pandas as pd

# Load the data from the Excel file
file_path = 'data/train_data_231108.xlsx'

# Read the Excel file
try:
    data = pd.read_excel(file_path)
except Exception as e:
    error_message = str(e)

# Check if the data is loaded correctly by displaying the first few rows
data.head() if 'data' in locals() else error_message



Unnamed: 0,job,instance,Processing Time,family,Setup Time
0,J1,in1,4482.966667,1,984
1,J2,in1,4344.916667,1,984
2,J3,in1,4470.7,1,984
3,J4,in1,1911.583333,1,984
4,J5,in1,553.85,1,984


In [57]:
import random
import numpy as np

# Assume we have the number of machines and instances from the data
num_machines = 10  # As mentioned in the problem statement
num_jobs = 20     # As mentioned in the problem statement

# Group the data by instance to get processing times for each instance
grouped_data = data.groupby('instance')['Processing Time'].apply(list)
grouped_data

instance
in1     [4482.966666666666, 4344.916666666667, 4470.7,...
in10    [2256.911203085747, 3717.1, 2401.764876047549,...
in11    [1471.216666666667, 5909.066666666667, 861.533...
in12    [3367.783333333333, 6571.15, 3975.966666666667...
in13    [1418.716666666667, 5690.633333333333, 3724.70...
                              ...                        
in79    [4824.25, 30782.61666666666, 15749.06666666667...
in8     [2401.766666666667, 4838.566666666667, 5689.7,...
in80    [38523.35, 37356.51666666667, 43006.5833333333...
in81    [12391.4, 13187.21666666667, 13745.78333333333...
in9     [1718.733619655377, 3229.382640905859, 12656.3...
Name: Processing Time, Length: 81, dtype: object

In [58]:
# Define a function to initialize a population of chromosomes
def init_population(population_size, num_jobs):
    population = []
    for _ in range(population_size):
        # Generate a random sequence of job numbers for each chromosome
        chromosome = np.random.permutation(num_jobs).tolist()
        population.append(chromosome)
    return population

# init_population(1,20)


In [59]:
# Adjust the makespan calculation function to use the specific setup times for the instance and family
def calculate_makespan(chromosome, processing_times, setup_times_instance, num_machines, job_family_mapping):
    # Initialize machine times
    machine_times = [0] * num_machines
    last_family = [-1] * num_machines  # To track the last family processed by each machine
    
    for job in chromosome:
        machine_index = machine_times.index(min(machine_times))
        current_family = job_family_mapping[f'J{job+1}']['family']  # Get the family of the current job
        # Check if setup time is needed
        setup_time = setup_times_instance[current_family] if last_family[machine_index] != current_family else 0
        # Update machine time with the processing time and setup time
        machine_times[machine_index] += processing_times[job] + setup_time
        # Update the last family processed on this machine
        last_family[machine_index] = current_family

    # The makespan is the maximum time among all machines
    makespan = max(machine_times)
    return makespan

In [60]:
# Adjust the fitness function to use the specific setup times for the instance and family
def fitness(chromosome, processing_times, setup_times_instance, num_machines, job_family_mapping):
    total_processing_time = sum(processing_times)
    makespan = calculate_makespan(chromosome, processing_times, setup_times_instance, num_machines, job_family_mapping)
    utilization = total_processing_time / (num_machines * makespan)
    return utilization


In [61]:
def tournament_selection(population, fitness_func, tournament_size, processing_times, setup_times_instance, num_machines, job_family_mapping):
    # Randomly select tournament_size individuals from the population
    participants = random.sample(population, tournament_size)
    
    # The 'key' parameter is expecting a function that it can call with one argument,
    # so we use a lambda that calls fitness_func with all the necessary arguments.
    # The lambda function is needed because fitness_func requires more than one argument.
    best_individual = max(participants, key=lambda ind: fitness_func(ind, processing_times, setup_times_instance, num_machines, job_family_mapping))
    
    return best_individual


In [62]:

# Define cycle crossover
def cycle_crossover(parent1, parent2):
    child = [-1] * len(parent1)  # Start with a child with no assigned jobs
    cycle_start = parent1[0]  # Start cycle with the first job of parent1
    index = 0
    while child[index] == -1:
        child[index] = parent1[index]  # Assign job from parent1 to the child
        index = parent2.index(child[index])  # Find the position of this job in parent2
    # Continue until we return to the cycle start position
    while parent1[index] != cycle_start:
        child[index] = parent1[index]  # Assign job from parent1 to the child
        index = parent2.index(child[index])  # Find the position of this job in parent2
    # Fill in remaining jobs from parent2
    for i in range(len(child)):
        if child[i] == -1:
            child[i] = parent2[i]
    return child

In [63]:

# Define swap mutation
def swap_mutation(chromosome):
    # Randomly select two indices to swap
    idx1, idx2 = random.sample(range(len(chromosome)), 2)
    # Swap the jobs at these indices
    chromosome[idx1], chromosome[idx2] = chromosome[idx2], chromosome[idx1]
    return chromosome

# Define survivor selection (fitness-based selection)
def survivor_selection(population, fitness_scores):
    # Sort the population based on the fitness scores in descending order
    sorted_population = [x for _, x in sorted(zip(fitness_scores, population), key=lambda pair: pair[0], reverse=True)]
    survivors = sorted_population[:len(population)]
    return survivors

In [64]:

# Adjust the run_generation function to use the updated tournament_selection
def run_generation(population, processing_times, setup_times_instance, num_machines, job_family_mapping, fitness, tournament_size):
    
    # children = []
    # while len(children) < len(population):
    # Selection
    parent1 = tournament_selection(
        population, 
        fitness, 
        tournament_size,
        processing_times, 
        setup_times_instance, 
        num_machines, 
        job_family_mapping
    )
    parent2 = tournament_selection(
        population, 
        fitness, 
        tournament_size,
        processing_times, 
        setup_times_instance, 
        num_machines, 
        job_family_mapping
    )
    # parent1, parent2 = random.sample(parents, 2)
    # Crossover
    child = cycle_crossover(parent1, parent2)
    
    # children.append(child)
    
    # Mutation
    if random.random() < 0.5:  # Assuming a mutation probability of 10%
        child = swap_mutation(child)
    
    # Calculate fitness for the new population
    # fitness_scores = [fitness_func(individual, processing_times, setup_times_instance, num_machines, job_family_mapping) for individual in children]
    
    # Survivor Selection
    new_population = survivor_selection(
        child + population, 
        fitness_scores + [fitness(ind, processing_times, setup_times_instance, num_machines, job_family_mapping) for ind in population]
    )
    min_parent = max( population, key=lambda ind: fitness(ind, processing_times, setup_times_instance, num_machines, job_family_mapping) )
    child_fitness = fitness(child, processing_times, setup_times_instance, num_machines, job_family_mapping)
    if child_fitness<min_parent:
        population.index()
    
    return new_population

In [65]:
# Assuming that the Excel file has the setup times for each family within each instance, 
# we will create a nested dictionary that stores this information.

# We will group the data by 'instance' and 'family' and then extract the 'Setup Time' for each group.
setup_times_by_instance = data.groupby(['instance', 'family'])['Setup Time'].first().unstack(fill_value=0).to_dict('index')

# Now we have a dictionary where the keys are instance identifiers, and the values are dictionaries
# with family identifiers as keys and setup times as values.

# Let's check the structure of the extracted setup times for the first few instances.
{instance: setup_times_by_instance[instance] for instance in list(setup_times_by_instance)[:5]}



{'in1': {1: 984, 2: 1063, 3: 577, 4: 372},
 'in10': {1: 1027, 2: 1335, 3: 1462, 4: 597},
 'in11': {1: 1176, 2: 499, 3: 641, 4: 1194},
 'in12': {1: 489, 2: 591, 3: 1104, 4: 369},
 'in13': {1: 1185, 2: 723, 3: 1267, 4: 629}}

In [66]:
job_family_mapping = data.set_index('job')[['family', 'Processing Time']].to_dict('index')

# Assume we have all required data and functions defined: fitness, run_generation, etc.

# Set the number of generations
num_generations = 100
population_size = 100
tournament_size = 5

fitness_values = []

# Iterate over each instance
for instance in setup_times_by_instance.keys():
    # Get the processing times for the current instance
    instance_processing_times = grouped_data[instance]
    
    # Get the setup times for the current instance
    setup_times_instance = setup_times_by_instance[instance]

    # Initialize the population
    population = init_population(population_size, num_jobs)

    # Run the genetic algorithm for a specified number of generations
    for generation in range(num_generations):
        
        population = run_generation(
            population,
            instance_processing_times,
            setup_times_instance,
            num_machines,
            job_family_mapping,
            fitness,
            tournament_size
        )

        # Optional: Output the generation number and best fitness in the population to track progress
        best_fitness = max([fitness(ind, instance_processing_times, setup_times_instance, num_machines, job_family_mapping) for ind in population])
        # print(f"Generation {generation+1}: Best Fitness = {best_fitness}")
    print(f" Best Fitness = {best_fitness}")
    
    # Append the fitness value to the list
    fitness_values.append(best_fitness)

# Calculate the average utilization across all instances
average_utilization = sum(fitness_values) / len(fitness_values)
print(f" Avg Fitness = {average_utilization}")



# The final population now contains the evolved chromosomes after 1000 generations



NameError: name 'children' is not defined

In [None]:
print(f" Avg Fitness = {average_utilization}")

 Avg Fitness = 0.7774136624276435
