In [4]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## flowshop

In [24]:
def read_flowshop(file_path:str) -> list[dict]:
    data = []
    with open(file_path, "r") as file:
        content = file.read()

    instances = content.strip().split("number of jobs, number of machines, initial seed, upper bound and lower bound :")
    
    for instance in instances[1:]:  # first split is empty
        lines = instance.strip().split("\n")
        
        # read job parameters
        header_values = list(map(int, re.findall(r"\d+", lines[0])))
        num_jobs, num_machines, seed, upper_bound, lower_bound = header_values
        
        # read processing times
        processing_times = []
        for line in lines[2:]:  # skip the "processing times :" line
            processing_times.extend(map(int, line.split()))
          
        data.append({
            "num_jobs": num_jobs,
            "num_machines": num_machines,
            "seed": seed,
            "upper_bound": upper_bound,
            "lower_bound": lower_bound,
            "processing_times": processing_times
        })
    
    return data

In [30]:
file_path = "../data/flowshop_tai20_5.txt"

flowshop = read_flowshop(file_path)

flowshop[5]

{'num_jobs': 20,
 'num_machines': 5,
 'seed': 402959317,
 'upper_bound': 1195,
 'lower_bound': 1180,
 'processing_times': [71,
  27,
  55,
  90,
  11,
  18,
  42,
  64,
  73,
  95,
  22,
  53,
  32,
  5,
  94,
  12,
  41,
  85,
  75,
  38,
  13,
  11,
  73,
  43,
  27,
  33,
  57,
  42,
  71,
  3,
  11,
  49,
  8,
  3,
  47,
  58,
  23,
  79,
  99,
  23,
  61,
  25,
  52,
  72,
  89,
  75,
  60,
  28,
  94,
  95,
  18,
  73,
  40,
  61,
  68,
  75,
  37,
  13,
  65,
  7,
  21,
  8,
  5,
  8,
  58,
  59,
  85,
  35,
  84,
  97,
  93,
  60,
  99,
  29,
  94,
  41,
  51,
  87,
  97,
  11,
  91,
  13,
  7,
  95,
  20,
  69,
  45,
  44,
  29,
  32,
  94,
  84,
  60,
  49,
  49,
  65,
  85,
  52,
  8,
  58]}

## jobshop

In [49]:
def read_jobshop(file_path:str) -> list[dict]:
    instances = []
    
    with open(file_path, 'r') as file:
        content = file.read()
    
    # Split data into different instances based on the pattern "Nb of jobs, Nb of Machines..."
    raw_instances = re.split(r"Nb of jobs, Nb of Machines, Time seed, Machine seed, Upper bound, Lower bound", content)
    
    for raw_instance in raw_instances[1:]:  # Skip the first empty split
        lines = raw_instance.strip().split("\n")
        
        # Extract metadata (first line)
        metadata = list(map(int, re.findall(r'\d+', lines[0])))
        nb_jobs, nb_machines, time_seed, machine_seed, upper_bound, lower_bound = metadata
        
        # Extract times
        times_start_idx = lines.index("Times") + 1
        machines_start_idx = lines.index("Machines") + 1
        
        times = []
        for line in lines[times_start_idx:machines_start_idx - 1]:
            times.extend(list(map(int, re.findall(r'\d+', line))))
        
        # Extract machines
        machines = []
        for line in lines[machines_start_idx:]:
            machines.extend(list(map(int, re.findall(r'\d+', line))))
        
        # Store parsed data
        instances.append({
            "nb_jobs": nb_jobs,
            "nb_machines": nb_machines,
            "time_seed": time_seed,
            "machine_seed": machine_seed,
            "upper_bound": upper_bound,
            "lower_bound": lower_bound,
            "times": times,
            "machines": machines
        })
    
    return instances

In [None]:
file_path = "../data/jobshop_tai20_15.txt"

jobshop = read_jobshop(file_path)

jobshop[2]

{'nb_jobs': 20,
 'nb_machines': 15,
 'time_seed': 874340513,
 'machine_seed': 509669280,
 'upper_bound': 1367,
 'lower_bound': 1243,
 'times': [91,
  17,
  4,
  63,
  67,
  30,
  87,
  80,
  95,
  14,
  17,
  22,
  1,
  85,
  41,
  77,
  77,
  9,
  77,
  24,
  8,
  64,
  6,
  12,
  13,
  71,
  76,
  95,
  8,
  6,
  92,
  3,
  12,
  27,
  58,
  66,
  99,
  33,
  7,
  78,
  96,
  30,
  54,
  23,
  88,
  19,
  45,
  65,
  24,
  30,
  30,
  49,
  32,
  78,
  31,
  3,
  25,
  9,
  2,
  22,
  84,
  61,
  35,
  44,
  37,
  16,
  97,
  85,
  51,
  26,
  13,
  76,
  41,
  2,
  96,
  85,
  55,
  2,
  65,
  52,
  97,
  81,
  8,
  22,
  59,
  95,
  52,
  85,
  64,
  13,
  64,
  94,
  4,
  13,
  98,
  26,
  32,
  20,
  97,
  28,
  63,
  2,
  23,
  14,
  62,
  56,
  98,
  56,
  28,
  1,
  96,
  27,
  38,
  41,
  94,
  77,
  63,
  63,
  81,
  6,
  63,
  98,
  64,
  37,
  89,
  96,
  88,
  13,
  72,
  28,
  57,
  99,
  11,
  8,
  96,
  17,
  71,
  80,
  33,
  87,
  82,
  44,
  14,
  85,
  2,
  60,
  7

## openshop

In [56]:
def read_openshop(file_path:str) -> list[dict]:
    instances = []
    
    with open(file_path, 'r') as file:
        content = file.read()
    
    # Split data into different instances based on the pattern "Nb of jobs, Nb of Machines..."
    raw_instances = re.split(r"number of jobs, number of machines, time seed, machine seed, upper bound, lower bound :", content, flags=re.IGNORECASE)
    
    for raw_instance in raw_instances[1:]:  # Skip the first empty split
        lines = raw_instance.strip().split("\n")
        
        # Extract metadata (first line)
        metadata = list(map(int, re.findall(r'\d+', lines[0])))
        nb_jobs, nb_machines, time_seed, machine_seed, upper_bound, lower_bound = metadata
        
        # Extract times
        times_start_idx = lines.index("processing times :") + 1
        machines_start_idx = lines.index("machines :") + 1
        
        times = []
        for line in lines[times_start_idx:machines_start_idx - 1]:
            times.extend(list(map(int, re.findall(r'\d+', line))))
        
        # Extract machines
        machines = []
        for line in lines[machines_start_idx:]:
            machines.extend(list(map(int, re.findall(r'\d+', line))))
        
        # Store parsed data
        instances.append({
            "nb_jobs": nb_jobs,
            "nb_machines": nb_machines,
            "time_seed": time_seed,
            "machine_seed": machine_seed,
            "upper_bound": upper_bound,
            "lower_bound": lower_bound,
            "times": times,
            "machines": machines
        })
    
    return instances

In [57]:
file_path = "../data/openshop_tai4_4.txt"

openshop = read_openshop(file_path)

openshop[2]

{'nb_jobs': 4,
 'nb_machines': 4,
 'time_seed': 1116611914,
 'machine_seed': 1729673136,
 'upper_bound': 271,
 'lower_bound': 262,
 'times': [2, 38, 5, 66, 68, 14, 95, 85, 89, 54, 77, 38, 59, 51, 63, 72],
 'machines': [1, 3, 2, 4, 2, 3, 4, 1, 3, 1, 2, 4, 1, 4, 2, 3]}