## 110-2 Operations Research Case 2

Algorithm 0: Revised based on https://digitalcommons.odu.edu/cgi/viewcontent.cgi?article=1024&context=cee_fac_pubs

## Loading data

In [47]:
import pandas as pd 
import numpy as np

In [48]:
datadir = './data'
instances = []
for i in range(5):
    name = f'instance_{i+1}.csv'
    fullpath = datadir+'/'+name
    instances.append(pd.read_csv(fullpath))

In [49]:
instances[0]

Unnamed: 0,Job ID,Stage-1 Processing Time,Stage-2 Processing Time,Stage-1 Machines,Stage-2 Machines,Due Time
0,1,2.7,1.3,12345,2345.0,5
1,2,1.6,1.4,2345,12345.0,5
2,3,0.7,1.9,12345,2345.0,5
3,4,0.5,0.7,2345,12345.0,5
4,5,0.8,1.0,12345,2345.0,5
5,6,2.5,0.0,12345,,5
6,7,1.4,2.0,2345,2345.0,5
7,8,1.1,1.1,2345,2345.0,10
8,9,0.8,0.7,12345,2345.0,10
9,10,1.0,0.5,2345,12345.0,10


In [50]:
df = instances[0]
due_dates = df['Due Time'].to_numpy()
# Related Relative Due Dates
RRDD = due_dates - np.min(due_dates)
RRDD

array([0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5])

In [186]:
# job structure 
class Job:
    '''structure for 1 job '''
    def __init__(self, row):
        '''input := df.iloc[idx, :]'''
        self.id = row['Job ID']
        self.due = row['Due Time']
        self.stage_states = [False for _ in range(2)] # True as complete, False as not yet processed
        self.stage_pt = [row['Stage-1 Processing Time'], row['Stage-2 Processing Time']]
        mfor1 = list(map(int, row['Stage-1 Machines'].split(',')))
        if row['Stage-2 Machines'] is not np.nan:
            mfor2 = list(map(int, row['Stage-2 Machines'].split(',')))
        else: mfor2 = [] 
        self.stage_mach = [mfor1, mfor2]
        self.assign_mach = [None for _ in range(2)]
        self.start_time = [-1 for _ in range(2)]
        self.end_time = [-1 for _ in range(2)]
    
    def __repr__(self):
        return f'\
          * Job id: {self.id}\n\
          * Due time:{self.due}\n\
          stage 1: {self.stage_states[0]}, {self.assign_mach[0]}\n\
                   {self.stage_pt[0]}, {self.stage_mach[0]}\n\
          stage 2: {self.stage_states[1]}, {self.assign_mach[1]}\n\
                   {self.stage_pt[1]}, {self.stage_mach[1]}'
    __str__ = __repr__
        

In [344]:
class Jobs:
    '''structure for multiple jobs' management'''
    def __init__(self, n):
        self.completion_times = np.zeros(n)
        self.tardiness = np.zeros(n)
    def get_RRDD(self):
        if getattr(self, 'RRDD', None) is None:
            self.RRDD = self.due_dates - np.min(self.due_dates)
        return self.RRDD # static
    
    def add_jobs(self, datas):
        self.due_dates = df['Due Time'].to_numpy()
        self.jobs = []
        for i in range(len(datas)):
            row = datas.iloc[i, :]
            jobi = Job(row)
            self.jobs.append(jobi)
    
    def assign(self, job_name, mach, st):
        '''job_name = (2, 0) means job 3 and op 1
        note that job and op is 0-indexed as well as machines
        op
        '''
        
        i = 0 
        jobidx, op = job_name 
        job = self.jobs[jobidx]
        while i < op:
            if job.stage_states[i] is not True:
                print(f'Error scheduling operation: previous operation {i} hasn\'t been scheduled.')
                return 
            i += 1
        J.completion_times[jobidx] = st + job.stage_pt[op]
        job.assign_mach[op] = mach
        job.start_time[op] = st
        job.end_time[op] = J.completion_times[jobidx]
        job.stage_states[op] = True 

In [345]:
class Machines:
    def __init__(self, df):
        '''pass the stage1, stage2 machine lists'''
        mfor1 = df['Stage-1 Machines'].values.tolist()
        mfor2 = df['Stage-2 Machines'].values.tolist()
        mfor1 = [list(map(int, x.split(','))) for x in mfor1]
        mfor2 = [list(map(int, x.split(','))) for x in mfor2 if x is not np.nan]
        mfor1 = sum(mfor1, [])
        mfor2 = sum(mfor2, [])
        self.number = max(max(mfor1), max(mfor2))
        self.versatile = [mfor1.count(i+1) + mfor2.count(i+1) for i in range(self.number)]
        self.schedule = [[] for _ in range(self.number)]
        self.span = [[] for _ in range(self.number)]
        self.fintime = [0 for _ in range(self.number)]
        
    def is_available(self):
        '''pass machine 編號(1-indexed) to get the state'''
        pass
    def _schedule(self, mach, job_name, proc_time):
        '''mach is 0-indexed'''
        display_name = tuple([x+1 for x in job_name])
        self.schedule[mach].append(display_name) 
        self.span[mach].append(proc_time)
        self.fintime[mach] += proc_time

## Preprocessing 

In [346]:
M = Machines(df)
M.versatile
J = Jobs(len(df))
J.add_jobs(df)
J.get_RRDD()

array([0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5])

## Scheduling the first operation

Re-run the code from **preprcoessing section** otherwise the data stuctures
keep accumulating repetitive datas. 

In [347]:
# for the first operation
temp_completion_times = np.array([job.stage_pt[0] for i, job in enumerate(J.jobs)])
temp_completion_times
tardiness = np.subtract(temp_completion_times, J.get_RRDD())

order = np.argsort(tardiness, kind='mergesort')# gives stable sort
# the job index order (0-indexed) to be assigned 
order

array([ 8,  9,  7, 11, 10,  3,  2,  4,  6,  1,  5,  0])

In [348]:
# schedule the first operation in accordance to tardiness

# which index
batch_index = 0

for jidx in order:
    job = J.jobs[jidx]
    job_name = (jidx, batch_index)
    job_proc_time = job.stage_pt[batch_index]
    if job_proc_time <= 0:
        # update job
        J.assign(job_name = job_name, 
                mach = -1,
                st = job.end_time[batch_index-1]) 
        # note that it's only possible for second operation to have proc time = 0
        # so this doesn't trigger index error
        continue
    # note that the available machines here is 1-indexed, change them to 0-indexed
    avail_machines_idx = [x-1 for x in job.stage_mach[batch_index]]
    # The less versatile, the less fintime, the better. Break ties with smallest-index rule.
    curr_machine = min(avail_machines_idx, key = lambda x: (M.fintime[x],M.versatile[x], x))
    # schedule the operation on curr_machine
    M._schedule(job_name = job_name, 
               mach = curr_machine, 
               proc_time = job_proc_time)
    # update the Jobs objects
    # start time is either concatenated to the machines's last scheduled operation's end
    # or the end of its previous operation 
    J.assign(job_name = job_name, 
            mach = curr_machine, 
             st = max(M.fintime[curr_machine], J.completion_times[jidx])
            )
print(*M.schedule, sep = '\n')

[(9, 1), (3, 1), (6, 1)]
[(10, 1), (4, 1), (7, 1)]
[(8, 1), (5, 1), (2, 1)]
[(12, 1), (1, 1)]
[(11, 1)]


In [349]:
M.fintime

[4.0, 2.9, 3.5, 4.7, 3.0]

In [350]:
M.span

[[0.8, 0.7, 2.5], [1.0, 0.5, 1.4], [1.1, 0.8, 1.6], [2.0, 2.7], [3.0]]

## Scheduling the second operation

In [351]:
batch_index = 1
for jidx in order:
    job = J.jobs[jidx]
    job_name = (jidx, batch_index)
    job_proc_time = job.stage_pt[batch_index]
    if job_proc_time <= 0:
        # update job
        J.assign(job_name = job_name, 
                mach = -1,
                st = job.end_time[batch_index-1]) 
        # note that it's only possible for second operation to have proc time = 0
        # so this doesn't trigger index error
        continue
    # note that the available machines here is 1-indexed, change them to 0-indexed
    avail_machines_idx = [x-1 for x in job.stage_mach[batch_index]]
    # The less versatile, the less fintime, the better. Break ties with smallest-index rule.
    curr_machine = min(avail_machines_idx, key = lambda x: (M.fintime[x],M.versatile[x], x))
    # schedule the operation on curr_machine
    M._schedule(job_name = job_name, 
               mach = curr_machine, 
               proc_time = job_proc_time)
    # update the Jobs objects
    # start time is either concatenated to the machines's last scheduled operation's end
    # or the end of its previous operation 
    J.assign(job_name = job_name, 
            mach = curr_machine, 
             st = max(M.fintime[curr_machine], J.completion_times[jidx])
            )
print(*M.schedule, sep = '\n')

[(9, 1), (3, 1), (6, 1), (2, 2)]
[(10, 1), (4, 1), (7, 1), (9, 2), (4, 2), (3, 2)]
[(8, 1), (5, 1), (2, 1), (8, 2), (5, 2)]
[(12, 1), (1, 1), (7, 2)]
[(11, 1), (10, 2), (11, 2), (1, 2)]


In [352]:
M.fintime

[5.4, 6.199999999999999, 5.6, 6.7, 6.2]

In [353]:
M.span

[[0.8, 0.7, 2.5, 1.4],
 [1.0, 0.5, 1.4, 0.7, 0.7, 1.9],
 [1.1, 0.8, 1.6, 1.1, 1.0],
 [2.0, 2.7, 2.0],
 [3.0, 0.5, 1.4, 1.3]]

In [354]:
J.completion_times

array([8.7, 6.8, 8.1, 5. , 6.6, 6.5, 8.7, 5.7, 4.3, 4. , 7.4, 4. ])

In [363]:
Tardy_number = len(np.where(J.completion_times > J.due_dates)[0])
Makespan = max(M.fintime)
print('First objective (# tardy):', Tardy_number)
print('Second objective: (makespan)', Makespan)

First objective (# tardy): 6
Second objective: (makespan) 6.7


### Scheduling by operations真的看起來太慘了...