## Loading data and functions

In [14]:
import os
import pandas as pd
import numpy as np
from heapq import heappush, heappop, heapify
from or_checker import checker, get_machine_number

from structs import * # classes 
from utils import *   # functions 

## Loading Paul's testcases

In [15]:
testcase_dir = '/Users/yangqingwen/Desktop/Github/OR_Case2/testcase/testcases'
files = [f for f in os.listdir(testcase_dir)]
len(files)
files[0]

'instance 184.csv'

In [16]:
# 喔乾job id要從1開始XDD 不然make_result會有錯

In [17]:
instances = {}
for i in range(len(files)):
    filename = files[i]
    fullpath = f'{testcase_dir}/{filename}'
    df = pd.read_csv(fullpath)
    df['Job ID'] = df['Job ID'].apply(lambda x : x+1)
    instances[filename] = df
df

Unnamed: 0,Job ID,Due Time,Stage-1 Processing Time,Stage-2 Processing Time,Stage-1 Machines,Stage-2 Machines
0,1,10.3,4.3,0.5,1234567891011121314,234568910121314.0
1,2,13.1,5.0,4.1,1234567891011121314,234568910121314.0
2,3,17.7,3.1,2.7,1234567891011121314,234568910121314.0
3,4,18.8,2.6,7.9,1234567891011121314,234568910121314.0
4,5,18.7,9.8,0.0,1234567891011121314,
5,6,22.7,6.1,7.7,1234567891011121314,234568910121314.0
6,7,18.6,4.9,8.6,1234567891011121314,234568910121314.0
7,8,15.3,8.0,0.0,1234567891011121314,
8,9,24.0,8.1,5.6,1234567891011121314,234568910121314.0
9,10,7.3,0.2,3.7,1234567891011121314,234568910121314.0


### Reloading functions

In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
refs = list(instances.items())

## Heuristic Algorithm with find-hole

In [20]:
def find_hole(job_name, 
              currjob, 
              J,
              M, 
              Mach_Q, 
              AvailMachTable):
    curr_jindex, curr_op = job_name 
    proc_time = currjob.stage_pt[curr_op]
    def find_hole_helper():
        for idx, curritem in enumerate(Mach_Q):
            _, m_id = curritem
            if not AvailMachTable[curr_jindex][curr_op][m_id]:
                continue
            # find legal holes
            if not M.holes[m_id]:
                continue
            for hole_id, hole in enumerate(M.holes[m_id]):
                hole_start, hole_end = hole
                hole_length = hole_end - hole_start 
                # enough length 
                if hole_length - proc_time >= -GAP:
                    # check legal precedence 
                    curr_hole_id = hole_id
                    if curr_op == 1 and hole_start >= currjob.end_time[0]:
                        return idx, m_id, curr_hole_id, hole_start + proc_time
                    elif curr_op == 0: 
                        return idx, m_id, curr_hole_id, hole_start + proc_time
                # 一律從hole_start開始schedule，沒辦法的話就跳過（不然更新holes那邊變超麻煩）
    res = find_hole_helper()
    if not res:
        # print(f'No result in finding a hole for {[x+1 for x in job_name]}')
        return False
    if res:
        idx, m_id, hole_id, fill_end = res
        #print(f'Schduling {m_id+1}, {M.holes[m_id][hole_id]} for {[x+1 for x in job_name]}')
        #print(f'Original: {M.holes[m_id]}')
        # idx是Queue中machine的位置
        hole = M.holes[m_id][hole_id]
        # print(hole)
        hole_start, hole_end = hole
        J.assign(job_name = job_name, 
                mach = m_id, 
                st = hole_start) 
        # update hole length and replace avg_hole_length 
        new_avg_hl = M.schedule_hole(
                job_name = job_name, 
                mach = m_id, 
                hole_id = hole_id, 
                fill_end = fill_end)  
        # print(f'Updated: {M.holes[m_id]}')
        return True 

In [21]:
# while not all operations in all jobs are scheduled
# https://stackoverflow.com/questions/59903948/how-to-iterate-heapq-without-losing-data

def heuristic(J, M): 
    # best_makepsan = sum(job_processing_time) for all jobs / |M|
    # heperparameters 
    TOLRATIO = 0.3
    Fail_Tolerance = 2
    best_makespan = sum(job.stage_pt[0]+job.stage_pt[1] for job in J.jobs)/M.number
    tolerance = best_makespan * TOLRATIO  # tolerance for idle time, if idle > tolerance, do not schedule the curr op in the current epoch. 
    print(f'[INFO] {len(J.jobs)} jobs, {M.number} machines')
    print(f'[INFO] Tolerance: {tolerance:.2f}')

    # Job_Q (lst_ratio, job_index, job_op) 
    Job_Q = make_Q(J)
    # print(f'Job Queue: {Job_Q}')
    # Mach_Q (versatility, avg_hole_length, m)
    Mach_Q = make_mQ(M)
    AvailMachs = getAvailMachs(J = J, M = M)
    
    
    fails = [0 for _ in range(len(J.jobs))]
    epoch = 0
    while Job_Q:
        epoch += 1
        PERMIT = True
        # step 3. extract_min() to get the job with minimal LST and its other attributes
        _, curr_job_index, curr_op = heappop(Job_Q)
        curr_job = J.jobs[curr_job_index]
        op_proc_time = curr_job.stage_pt[curr_op]
        job_name = (curr_job_index, curr_op)
         
        
        # if curr_job has no second operation 
        if op_proc_time <= GAP and curr_op == 1: 
            J.assign(job_name = job_name, 
                    mach = None,
                    st = curr_job.end_time[curr_op-1]) 
            # note that it's only possible for second operation to have proc time = 0
            # so this doesn't trigger index error
            continue 
        # step 4-1. calculate the best machine: find-hole
        # 'job_name', 'currjob', and 'Mach_Q'
        if find_hole(J = J, M = M,
                     job_name = job_name, currjob = curr_job, Mach_Q = Mach_Q,
                  AvailMachTable = AvailMachs):

            continue
            
    
        # step 4-2. if find-hole fails, calculate the best machine and schedule at the end 
        avail_machines_idx = [x-1 for x in curr_job.stage_mach[curr_op]]
        curr_machine = min(avail_machines_idx, key = lambda x: (M.fintime[x], M.versatile[x], x))
        # ARE THERE REASONS TO POSTPONE THE CURR OP?
        if J.completion_times[curr_job_index] + op_proc_time > J.due_dates[curr_job_index] and curr_op == 1 and fails[curr_job_index] < Fail_Tolerance:
            #print(f'[INFO] Job {curr_job_index+1} op {curr_op+1} will be tardy even if scheduled, queue last.')
            curr_new_value = float('inf')
            PERMIT = False
        
        # ARE THERE REASONS TO POSTPONE THE CURR OP (if curr_op is second op)?
        elif M.fintime[curr_machine] < J.completion_times[curr_job_index]:
            
            idle = J.completion_times[curr_job_index] - M.fintime[curr_machine]
            if idle > tolerance and curr_op == 1 and fails[curr_job_index] < Fail_Tolerance:
                #print(f'[INFO] Job {curr_job_index+1} op {curr_op+1} has idle {idle:.2f}, postpone it.')
                PERMIT = False
                if Job_Q:
                    curr_new_value = Job_Q[0][0] + 3
                else:
                    curr_new_value = 0 # the last one 
            else:
                #print(f'[INFO] Job {curr_job_index+1} op {curr_op+1} has idle {idle:.2f}.')
                
                M.add_idle( 
                hole_start = M.fintime[curr_machine],
                hole_end =  J.completion_times[curr_job_index], 
                mach = curr_machine, 
                idle_time = idle)
         
        if PERMIT:
            # print(f'Scheduling {[x+1 for x in job_name]} on machine {curr_machine+1}\'s end at {M.fintime[curr_machine]}')
            J.assign(job_name = job_name, 
                mach = curr_machine, 
                 st = M.fintime[curr_machine]
                ) 
            M._schedule(job_name = job_name, 
               mach = curr_machine, 
                proc_time = op_proc_time,
               st = M.fintime[curr_machine])
            curr_new_value = J.get_LST()[curr_job_index]
        else: 
            fails[curr_job_index] += 1
        # print(f'{epoch} Fails Count:', fails)
        # update the LST value and push it back to Q if the job has its second operation that hasn't been done
        if not PERMIT:
            heappush(Job_Q, (curr_new_value, curr_job_index, curr_op))
            # it maintains the heap invariant, no need to heapify
    return J, M

## Running tests and Checking feasibility

In [22]:
#Results = {}

testcase_num = len(refs)
GAp = 1e-7
passed, success = 0, 0
dumps = []
for inst in refs:
    filename, data = inst
    print(f'** Summary {filename}:')
    try:
        
        M = Machines(data)
        J = Jobs(len(data))
        J.add_jobs(data)
        heuristic(J = J, M = M)
        Tardy_jobs = list(np.where(J.completion_times - J.due_dates > -GAP)[0])
        Tardy_jobs = [x+1 for x in Tardy_jobs]
        success += 1
        # print(J.completion_times)
        Makespan = max(M.fintime)
        ans = make_result(J)
        res = checker(ans, instances[filename])
        if res:
            passed += 1
            tardy, makespan, sch = res
            print(f'Testcase {filename} passed.')
            print(f'tardy number: {len(tardy)}, tardies: {tardy}, makespan: {makespan}')
            # print(*sch, sep = '\n')
        else: print(f'Testcase {filename} failed.')
    
        # Results[filename] = {'J':J, 'M':M}
        
    except Exception as e:
        dumps.append(f'{filename} has error {e}.')
        
print(f'* {success} testcases executed, {len(refs) - success} dumped') 
print(*dumps, sep = '\n')
print(f'* {passed} testcases passed.') 

** Summary instance 184.csv:
[INFO] 18 jobs, 9 machines
[INFO] Tolerance: 5.15
Testcase instance 184.csv passed.
tardy number: 5, tardies: [0, 11, 14, 16, 17], makespan: 21.3
** Summary instance 190.csv:
[INFO] 6 jobs, 18 machines
[INFO] Tolerance: 0.74
Testcase instance 190.csv passed.
tardy number: 1, tardies: [3], makespan: 12.7
** Summary instance 14.csv:
[INFO] 15 jobs, 14 machines
[INFO] Tolerance: 3.31
Testcase instance 14.csv passed.
tardy number: 3, tardies: [0, 1, 13], makespan: 19.6
** Summary instance 28.csv:
[INFO] 23 jobs, 10 machines
[INFO] Tolerance: 6.55
Testcase instance 28.csv passed.
tardy number: 12, tardies: [0, 1, 4, 6, 7, 9, 11, 13, 17, 19, 20, 22], makespan: 26.3
** Summary instance 147.csv:
[INFO] 8 jobs, 8 machines
[INFO] Tolerance: 3.18
Testcase instance 147.csv passed.
tardy number: 0, tardies: [], makespan: 16.8
** Summary instance 153.csv:
[INFO] 1 jobs, 15 machines
[INFO] Tolerance: 0.17
Testcase instance 153.csv passed.
tardy number: 0, tardies: [], mak

Testcase instance 74.csv passed.
tardy number: 46, tardies: [0, 3, 6, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 52, 53, 56, 57, 58, 60, 61, 62, 63], makespan: 41.3
** Summary instance 133.csv:
[INFO] 54 jobs, 18 machines
[INFO] Tolerance: 7.55
Testcase instance 133.csv passed.
tardy number: 32, tardies: [0, 1, 4, 5, 7, 9, 11, 14, 17, 18, 20, 23, 26, 27, 28, 29, 31, 32, 33, 34, 37, 38, 39, 40, 42, 46, 47, 48, 49, 50, 51, 52], makespan: 30.9
** Summary instance 48.csv:
[INFO] 19 jobs, 7 machines
[INFO] Tolerance: 6.08
Testcase instance 48.csv passed.
tardy number: 10, tardies: [3, 5, 9, 10, 12, 13, 14, 15, 16, 17], makespan: 27.9
** Summary instance 127.csv:
[INFO] 5 jobs, 10 machines
[INFO] Tolerance: 1.38
Testcase instance 127.csv passed.
tardy number: 0, tardies: [], makespan: 11.0
** Summary instance 49.csv:
[INFO] 20 jobs, 14 machines
[INFO] Tolerance: 3.71
Testcase instance 49.csv passed.
ta

Testcase instance 116.csv passed.
tardy number: 14, tardies: [2, 3, 8, 11, 13, 14, 15, 16, 18, 21, 23, 26, 27, 30], makespan: 25.7
** Summary instance 92.csv:
[INFO] 51 jobs, 14 machines
[INFO] Tolerance: 10.06
Testcase instance 92.csv passed.
tardy number: 39, tardies: [0, 1, 2, 3, 5, 6, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 49], makespan: 41.4
** Summary instance 86.csv:
[INFO] 2 jobs, 15 machines
[INFO] Tolerance: 0.42
Testcase instance 86.csv passed.
tardy number: 0, tardies: [], makespan: 17.2
** Summary instance 90.csv:
[INFO] 7 jobs, 14 machines
[INFO] Tolerance: 1.30
Testcase instance 90.csv passed.
tardy number: 0, tardies: [], makespan: 14.1
** Summary instance 84.csv:
[INFO] 32 jobs, 19 machines
[INFO] Tolerance: 4.85
Testcase instance 84.csv passed.
tardy number: 8, tardies: [0, 2, 4, 8, 13, 19, 20, 28], makespan: 20.2
** Summary instance 100.csv:
[INFO] 34 jobs, 12 machines
[INFO] Tole

[INFO] 5 jobs, 6 machines
[INFO] Tolerance: 2.29
Testcase instance 162.csv passed.
tardy number: 0, tardies: [], makespan: 16.9
** Summary instance 189.csv:
[INFO] 18 jobs, 14 machines
[INFO] Tolerance: 3.75
Testcase instance 189.csv passed.
tardy number: 3, tardies: [11, 13, 14], makespan: 22.0
* 187 testcases executed, 5 dumped
instance 154.csv has error 'float' object has no attribute 'split'.
instance 54.csv has error 'int' object has no attribute 'split'.
instance 51.csv has error 'float' object has no attribute 'split'.
instance 22.csv has error 'int' object has no attribute 'split'.
instance 158.csv has error 'float' object has no attribute 'split'.
* 187 testcases passed.


## Running TA's testcases

In [23]:
datadir = '/Users/yangqingwen/Desktop/Github/OR_Case2/data'
instances = []
for i in range(5):
    name = f'instance_{i+1}.csv'
    fullpath = datadir+'/'+name
    instances.append(pd.read_csv(fullpath))

In [24]:
Answers = []
for inst in range(5):
    data = instances[inst]
    M = Machines(data)
    J = Jobs(len(data))
    J.add_jobs(data)
    heuristic(J = J, M = M)
    print(f'** Summary \ninstance {inst+1}:')
    Tardy_jobs = list(np.where(J.completion_times > J.due_dates)[0])
    Tardy_jobs = [x+1 for x in Tardy_jobs]
    Makespan = max(M.fintime)
    print('First objective (# tardy):', len(Tardy_jobs), Tardy_jobs)
    print('Second objective (makespan):', Makespan)
    print('==================================')
    Answers.append(make_result(J))

[INFO] 12 jobs, 5 machines
[INFO] Tolerance: 1.81
** Summary 
instance 1:
First objective (# tardy): 1 [1]
Second objective (makespan): 6.9
[INFO] 11 jobs, 5 machines
[INFO] Tolerance: 2.21
** Summary 
instance 2:
First objective (# tardy): 1 [7]
Second objective (makespan): 8.6
[INFO] 10 jobs, 5 machines
[INFO] Tolerance: 2.33
** Summary 
instance 3:
First objective (# tardy): 3 [1, 2, 3]
Second objective (makespan): 10.3
[INFO] 15 jobs, 7 machines
[INFO] Tolerance: 5.06
** Summary 
instance 4:
First objective (# tardy): 5 [2, 3, 4, 10, 11]
Second objective (makespan): 22.009999999999998
[INFO] 20 jobs, 9 machines
[INFO] Tolerance: 5.55
** Summary 
instance 5:
First objective (# tardy): 5 [3, 11, 17, 19, 20]
Second objective (makespan): 28.700000000000003


## Checking feasibility

In [12]:
for i in range(5):
    res = checker(Answers[i], instances[i])
    if res:
        tardy, makespan, sch = res
        print(f'Testcase {i+1} passed.')
        print(f'tardy number: {len(tardy)}, tardies: {tardy}, makespan: {makespan}')
        print(*sch, sep = '\n')
    else: print(f'Testcase {i+1} failed.')
    print('=================')

Testcase 1 passed.
tardy number: 1, tardies: [0], makespan: 6.9
[((1, 1), 2.7), ((5, 1), 3.5), ((9, 1), 4.3), ((10, 2), 6.7)]
[((7, 1), 1.4), ((3, 2), 3.3), ((12, 1), 5.3), ((1, 2), 6.6)]
[((3, 1), 0.7), ((4, 1), 1.2), ((7, 2), 3.4), ((4, 2), 4.1), ((8, 1), 5.2), ((10, 1), 6.2)]
[((6, 1), 2.5), ((11, 1), 5.5), ((11, 2), 6.9)]
[((2, 1), 1.6), ((2, 2), 3.0), ((5, 2), 4.5), ((9, 2), 5.2), ((8, 2), 6.3)]
Testcase 2 passed.
tardy number: 1, tardies: [6], makespan: 8.6
[((2, 1), 1.6), ((1, 1), 4.3), ((9, 1), 5.1), ((11, 2), 7.9)]
[((3, 1), 1.0), ((2, 2), 3.9), ((11, 1), 6.4), ((7, 2), 7.9)]
[((5, 1), 0.8), ((3, 2), 3.7), ((4, 2), 4.5), ((1, 2), 6.0), ((10, 2), 8.6)]
[((4, 1), 2.8), ((7, 1), 4.2), ((10, 1), 6.4), ((9, 2), 8.2)]
[((6, 1), 2.7), ((5, 2), 4.6), ((8, 1), 6.8)]
Testcase 3 passed.
tardy number: 3, tardies: [0, 1, 2], makespan: 10.3
[((6, 1), 3.3), ((3, 1), 6.3), ((2, 2), 10.3)]
[((1, 1), 3.0), ((10, 1), 5.6), ((1, 2), 7.1)]
[((4, 1), 3.0), ((9, 2), 5.2), ((10, 2), 6.9)]
[((5, 1), 1