# Data Preparation

In [1]:
import json

# utils
from src.utils.converter import *

# data access
from configs.path_manager import get_path

## I. Data Import

In [2]:
basic_data_path = get_path("data", "basic")

In [3]:
file_path = basic_data_path / "jobshop.txt"

# read file
file = open(file_path, encoding="utf-8")
content = file.read()
file.close()

# Output for control purposes (first 100 lines)
print("\n".join(content.split("\n")[:100]))

This file contains a set of 82 JSP test instances.

These instances are contributed to the OR-Library by
Dirk C. Mattfeld (email dirk@uni-bremen.de) and 
Rob J.M. Vaessens (email robv@win.tue.nl).
 
o abz5-abz9 are from
   J. Adams, E. Balas and D. Zawack (1988),
   The shifting bottleneck procedure for job shop scheduling,
   Management Science 34, 391-401.
o ft06, ft10, and ft20 are from 
   H. Fisher, G.L. Thompson (1963), 
   Probabilistic learning combinations of local job-shop scheduling rules, 
     J.F. Muth, G.L. Thompson (eds.), 
     Industrial Scheduling, 
     Prentice Hall, Englewood Cliffs, New Jersey, 
   225-251.
o la01-la40 are from 
   S. Lawrence (1984),
   Resource constrained project scheduling: an experimental investigation of 
     heuristic scheduling techniques (Supplement), 
   Graduate School of Industrial Administration,
   Carnegie-Mellon University, Pittsburgh, Pennsylvania.
o orb01-orb10 are from 
   D. Applegate, W. Cook (1991),
   A computational study

## II. Exclusion of the initial text

In [4]:
content_without_introduction = exclude_initial_text(content)

# example (first three instances with corresponding matrices)
print("\n".join(re.split(r"\n.*\+{3,}.*\n", content_without_introduction)[:6]))

 
 instance abz5
 
 Adams, Balas, and Zawack 10x10 instance (Table 1, instance 5)
 10 10
 4 88 8 68 6 94 5 99 1 67 2 89 9 77 7 99 0 86 3 92
 5 72 3 50 6 69 4 75 2 94 8 66 0 92 1 82 7 94 9 63
 9 83 8 61 0 83 1 65 6 64 5 85 7 78 4 85 2 55 3 77
 7 94 2 68 1 61 4 99 3 54 6 75 5 66 0 76 9 63 8 67
 3 69 4 88 9 82 8 95 0 99 2 67 6 95 5 68 7 67 1 86
 1 99 4 81 5 64 6 66 8 80 2 80 7 69 9 62 3 79 0 88
 7 50 1 86 4 97 3 96 0 95 8 97 2 66 5 99 6 52 9 71
 4 98 6 73 3 82 2 51 1 71 5 94 7 85 0 62 8 95 9 79
 0 94 6 71 3 81 7 85 1 66 2 90 4 76 5 58 8 93 9 97
 3 50 0 59 1 82 8 67 7 56 9 96 6 58 4 81 5 59 2 96
 
 instance abz6
 
 Adams, and Zawack 10x10 instance (Table 1, instance 6)
 10 10
 7 62 8 24 5 25 3 84 4 47 6 38 2 82 0 93 9 24 1 66
 5 47 2 97 8 92 9 22 1 93 4 29 7 56 3 80 0 78 6 67
 1 45 7 46 6 22 2 26 9 38 0 69 4 40 3 33 8 75 5 96
 4 85 8 76 5 68 9 88 3 36 6 75 2 56 1 35 0 77 7 85
 8 60 9 20 7 25 3 63 4 81 0 52 1 30 5 98 6 54 2 86
 3 87 9 73 5 51 2 95 4 65 1 86 6 22 8 58 0 80 7 65
 5 81 2 53 7 

## II. Conversion to dictionary

In [5]:
# dictionary with instances as keys and matrix as value (string)
instances_string_dict = parse_text_with_instances_to_dict(content_without_introduction, verbose=False)

# example
print(instances_string_dict["instance ft10"])

 0 29 1 78 2  9 3 36 4 49 5 11 6 62 7 56 8 44 9 21
 0 43 2 90 4 75 9 11 3 69 1 28 6 46 5 46 7 72 8 30
 1 91 0 85 3 39 2 74 8 90 5 10 7 12 6 89 9 45 4 33
 1 81 2 95 0 71 4 99 6  9 8 52 7 85 3 98 9 22 5 43
 2 14 0  6 1 22 5 61 3 26 4 69 8 21 7 49 9 72 6 53
 2 84 1  2 5 52 3 95 8 48 9 72 0 47 6 65 4  6 7 25
 1 46 0 37 3 61 2 13 6 32 5 21 9 32 8 89 7 30 4 55
 2 31 0 86 1 46 5 74 4 32 6 88 8 19 9 48 7 36 3 79
 0 76 1 69 3 76 5 51 2 85 9 11 6 40 7 89 4 26 8 74
 1 85 0 13 2 61 6  7 8 64 9 76 5 47 3 52 4 90 7 45


In [6]:
# dictionary with instances as keys and matrix as value (dictionary/JSON of routings)
instances_dict = structure_dict(instances_string_dict)

# example
instance_ft10 = instances_dict["instance ft10"]
for routing_id, operations in instance_ft10.items():
    print(f"{routing_id}: {operations}")

0: [[0, 29], [1, 78], [2, 9], [3, 36], [4, 49], [5, 11], [6, 62], [7, 56], [8, 44], [9, 21]]
1: [[0, 43], [2, 90], [4, 75], [9, 11], [3, 69], [1, 28], [6, 46], [5, 46], [7, 72], [8, 30]]
2: [[1, 91], [0, 85], [3, 39], [2, 74], [8, 90], [5, 10], [7, 12], [6, 89], [9, 45], [4, 33]]
3: [[1, 81], [2, 95], [0, 71], [4, 99], [6, 9], [8, 52], [7, 85], [3, 98], [9, 22], [5, 43]]
4: [[2, 14], [0, 6], [1, 22], [5, 61], [3, 26], [4, 69], [8, 21], [7, 49], [9, 72], [6, 53]]
5: [[2, 84], [1, 2], [5, 52], [3, 95], [8, 48], [9, 72], [0, 47], [6, 65], [4, 6], [7, 25]]
6: [[1, 46], [0, 37], [3, 61], [2, 13], [6, 32], [5, 21], [9, 32], [8, 89], [7, 30], [4, 55]]
7: [[2, 31], [0, 86], [1, 46], [5, 74], [4, 32], [6, 88], [8, 19], [9, 48], [7, 36], [3, 79]]
8: [[0, 76], [1, 69], [3, 76], [5, 51], [2, 85], [9, 11], [6, 40], [7, 89], [4, 26], [8, 74]]
9: [[1, 85], [0, 13], [2, 61], [6, 7], [8, 64], [9, 76], [5, 47], [3, 52], [4, 90], [7, 45]]


### JSON Export

In [7]:
file_path = basic_data_path / "jobshop_instances.json"
with open(file_path, "w", encoding="utf-8") as f:
    json.dump(instances_dict, f, indent=2)

## III. Export of 10x10 Fisher-Thompson

### CSV Export

In [8]:
instance = instances_dict["instance ft10"]
df_routings_ft10 = routing_dict_to_df(instance)
df_routings_ft10

Unnamed: 0,Routing_ID,Operation,Machine,Processing Time
0,0,0,M00,29
1,0,1,M01,78
2,0,2,M02,9
3,0,3,M03,36
4,0,4,M04,49
...,...,...,...,...
95,9,5,M09,76
96,9,6,M05,47
97,9,7,M03,52
98,9,8,M04,90


In [9]:
df_routings_ft10.to_csv(basic_data_path / "ft10_routings.csv", index = False)