In [1]:
from gurobipy import *
import pandas as pd
import numpy as np
import os

# display settings
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.set_option('display.float_format', lambda x: '%.4f' % x)

## Set mode

In [2]:
# mode = "train"
mode = "testcase"

## Set working directory

In [3]:
print(os.getcwd())

if mode == "train":
    if (os.getcwd() == "/Users/wangyanpu/Desktop/台灣大學/大三下/作業研究/case/Case2/to students/data"):
        pass
    else:
        os.chdir("/Users/wangyanpu/Desktop/台灣大學/大三下/作業研究/case/Case2/to students/data")
else:
    if (os.getcwd() == "/Users/wangyanpu/Desktop/台灣大學/大三下/作業研究/case/Case2/testcase/testcases"):
        pass
    else:
        os.chdir("/Users/wangyanpu/Desktop/台灣大學/大三下/作業研究/case/Case2/testcase/testcases")

print(os.getcwd())
os.listdir()

/Users/wangyanpu/Desktop/台灣大學/大三下/作業研究/case/Case2/baseline
/Users/wangyanpu/Desktop/台灣大學/大三下/作業研究/case/Case2/testcase/testcases


['instance 345.csv',
 'instance 351.csv',
 'instance 184.csv',
 'instance 190.csv',
 'instance 14.csv',
 'instance 28.csv',
 'instance 147.csv',
 'instance 153.csv',
 'instance 9.csv',
 'instance 219.csv',
 'instance 231.csv',
 'instance 225.csv',
 'instance 224.csv',
 'instance 230.csv',
 'instance 218.csv',
 'instance 8.csv',
 'instance 152.csv',
 'instance 29.csv',
 'instance 146.csv',
 'instance 15.csv',
 'instance 191.csv',
 'instance 185.csv',
 'instance 350.csv',
 'instance 344.csv',
 'instance 352.csv',
 'instance 346.csv',
 'instance 193.csv',
 'instance 187.csv',
 'instance 178.csv',
 'instance 17.csv',
 'instance 150.csv',
 'instance 144.csv',
 'instance 226.csv',
 'instance 232.csv',
 'instance 233.csv',
 'instance 227.csv',
 'instance 145.csv',
 'instance 151.csv',
 'instance 179.csv',
 'instance 16.csv',
 'instance 186.csv',
 'instance 192.csv',
 'instance 347.csv',
 'instance 353.csv',
 'instance 343.csv',
 'instance 196.csv',
 'instance 182.csv',
 'instance 155.csv',
 '

## Defining Functions

In [4]:
def split_(row, machine_set):
    if row is np.nan:
        return list(machine_set)
    else:
        return list(map(int, row.split(",")))

In [5]:
def file_preprocess(file_path):
    ''' read csv and create usable dataframe'''
    
    # get machine set
    df = pd.read_csv(file_path, index_col=0)
    mfor1 = df['Stage-1 Machines'].values
    mfor2 = df['Stage-2 Machines'].values
    mfor1 = [list(map(int, x.split(','))) for x in mfor1]
    mfor2 = [list(map(int, x.split(','))) for x in mfor2 if x is not np.nan]
    mfor1 = [item for sublist in mfor1 for item in sublist]
    mfor2 = [item for sublist in mfor2 for item in sublist]
    machine_set_ = list(set(mfor1 + mfor2))
    
    # turn strings into list in dataframe
    df["Stage-1 Machines"] = df["Stage-1 Machines"].apply(split_, machine_set=machine_set_)
    df["Stage-2 Machines"] = df["Stage-2 Machines"].apply(split_, machine_set=machine_set_)
    
    from sklearn.preprocessing import MultiLabelBinarizer
    stage1 = df["Stage-1 Machines"]
    stage2 = df["Stage-2 Machines"]

    # create stage machine dummy variables from list
    mlb = MultiLabelBinarizer()
    mlb2 = MultiLabelBinarizer()
    dummyM1_df = pd.DataFrame(mlb.fit_transform(stage1), columns=mlb.classes_, index=df.index)
    dummyM1_df = dummyM1_df.add_prefix("stage1_m")
    dummyM2_df = pd.DataFrame(mlb2.fit_transform(stage2), columns=mlb2.classes_, index=df.index)
    dummyM2_df = dummyM2_df.add_prefix("stage2_m")
    dummyM1_df, dummyM2_df
    
    # dummy column names
    dummyM1_col = dummyM1_df.columns
    dummyM2_col = dummyM2_df.columns

    df = df.join(dummyM1_df, on="Job ID")
    df = df.join(dummyM2_df, on="Job ID")
    df = df.drop(["Stage-1 Machines", "Stage-2 Machines"], axis=1)
    
    return df, machine_set_, dummyM1_col, dummyM2_col

In [12]:
def IP(instance, machine_set, dummyM1_col, dummyM2_col, time_limit1, time_limit2):
    ''' two stage optimization'''
    
    # turn data into corresponding list
    pt_stage1 = instance["Stage-1 Processing Time"].values
    pt_stage2 = instance["Stage-2 Processing Time"].values
    due_time = instance["Due Time"].values

    job_id = list(instance.index)
    m_dummy_stage1 = instance[dummyM1_col].values
    m_dummy_stage2 = instance[dummyM2_col].values
    
    # L: big number
    L = (sum(pt_stage1) + sum(pt_stage2)) * 100
    
    ''' stage 1 '''
    p1 = Model("p1")
    p1.setParam('TimeLimit', time_limit1)
    j_num = int(len(job_id))
    m_len = len(machine_set)

    # Variables
    s_jkm = []
    c_jkm = []
    x_jkm = []
    for j in range(j_num):
        tmp_s = []
        tmp_c = []
        tmp_x = []
        for k in range(2):
            tmp_s.append([])
            tmp_c.append([])
            tmp_x.append([])
            for m in machine_set:
                tmp_s[k].append(p1.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "s_" + str(j + 1) + "," + str(k + 1) + "," + str(m)))
                tmp_c[k].append(p1.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "c_" + str(j + 1) + "," + str(k + 1) + "," + str(m)))
                tmp_x[k].append(p1.addVar(lb = 0, vtype = GRB.BINARY, name = "x_" + str(j + 1) + "," + str(k) + ","+ str(m)))  
        s_jkm.append(tmp_s)
        c_jkm.append(tmp_c)
        x_jkm.append(tmp_x)

    y_jkjkm = []
    for j1 in range(j_num):
        y_jkjkm.append([])
        for k1 in range(2):
            y_jkjkm[j1].append([])
            for j2 in range(j1 + 1, j_num):
                y_jkjkm[j1][k1].append([])
                for k2 in range(2):
                    y_jkjkm[j1][k1][j2 - j1 - 1].append([])
                    for m in machine_set:
                        y_jkjkm[j1][k1][j2 - j1 - 1][k2].append(p1.addVar(lb = 0, vtype = GRB.BINARY, 
                                                       name = "y_" + str(j1 + 1) + "," + str(k1 + 1) + "," + str(j2 + 1) + "," + str(k2 + 1)+ "," + str(m)))

    c_j = []
    for j in range(j_num):
        c_j.append(p1.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "c_" + str(j + 1)))
    t_j = []
    for j in range(j_num):
        t_j.append(p1.addVar(lb = 0, vtype = GRB.BINARY, name = "t_" + str(j + 1)))
        
    # setting the objective function 
    p1.setObjective(quicksum(t_j[j] for j in range(j_num)), GRB.MINIMIZE) 
    
    # add constraints
    # job stage 1 machine limit
    p1.addConstrs((x_jkm[j][0][m] <= m_dummy_stage1[j][m] for j in range(j_num) for m in range(m_len)))
    # job stage 2 machine limit
    p1.addConstrs((x_jkm[j][1][m] <= m_dummy_stage2[j][m] for j in range(j_num) for m in range(m_len)))

    # tardy variable
    p1.addConstrs((t_j[j] * L >= c_j[j] - due_time[j] 
                    for j in range(j_num)), "tardy count")

    # job complete time
    p1.addConstrs((c_j[j] >= quicksum(c_jkm[j][1][m] for m in range(m_len))
                    for j in range(j_num)), "job completion time")

    # each job assigned once
    p1.addConstrs((quicksum(x_jkm[j][k][m] for m in range(m_len)) == 1 for j in range(j_num) for k in range(2)), "job assignment constraint")

    p1.addConstrs((s_jkm[j][k][m] + c_jkm[j][k][m] <= x_jkm[j][k][m] * L for j in range(j_num) for k in range(2) for m in range(m_len)))

    # process time
    p1.addConstrs((c_jkm[j][0][m] >= s_jkm[j][0][m] + pt_stage1[j] - (1 - x_jkm[j][0][m]) * L for j in range(j_num) for m in range(m_len)))
    p1.addConstrs((c_jkm[j][1][m] >= s_jkm[j][1][m] + pt_stage2[j] - (1 - x_jkm[j][1][m]) * L for j in range(j_num) for m in range(m_len)))

    for m in range(m_len):
        for j1 in range(j_num):
            for k1 in range(2):
                for j2 in range(j_num - j1 - 1):
                    for k2 in range(2):
                    # 注意ijm的indexing方法不同
                        p1.addConstr(s_jkm[j1][k1][m] >= c_jkm[j1 + j2 + 1][k2][m] - y_jkjkm[j1][k1][j2][k2][m] * L)
                        p1.addConstr(s_jkm[j1 + j2 + 1][k2][m] >= c_jkm[j1][k1][m] - (1 - y_jkjkm[j1][k1][j2][k2][m]) * L)

    # subjob 2 starts only after subjob 1
    p1.addConstrs((quicksum(s_jkm[j][1][m] for m in range(m_len)) >= quicksum(c_jkm[j][0][m] for m in range(m_len)) for j in range(j_num)), "subjob 1 ends before subjob2 starts")
    
    r1 = p1.relax()
    r1.optimize()
    
    
    ''' stage 2 '''
    p1_2 = Model("p1_2")
    
    # set time limit
    p1_2.setParam('TimeLimit', time_limit2)
    j_num = int(len(job_id))
    m_len = len(machine_set)

    # tardy job num result from part 1
    T = r1.objVal

    # Variables
    u = p1_2.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "u")
    s_jkm = []
    c_jkm = []
    x_jkm = []
    for j in range(j_num):
        tmp_s = []
        tmp_c = []
        tmp_x = []
        for k in range(2):
            tmp_s.append([])
            tmp_c.append([])
            tmp_x.append([])
            for m in machine_set:
                tmp_s[k].append(p1_2.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "s_" + str(j + 1) + "," + str(k + 1) + "," + str(m)))
                tmp_c[k].append(p1_2.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "c_" + str(j + 1) + "," + str(k + 1) + "," + str(m)))
                tmp_x[k].append(p1_2.addVar(lb = 0, vtype = GRB.BINARY, name = "x_" + str(j + 1) + "," + str(k) + ","+ str(m)))  
        s_jkm.append(tmp_s)
        c_jkm.append(tmp_c)
        x_jkm.append(tmp_x)
    print(len(x_jkm))
    print(len(x_jkm[0]))
    print(len(x_jkm[0][0]))

    y_jkjkm = []
    for j1 in range(j_num):
        y_jkjkm.append([])
        for k1 in range(2):
            y_jkjkm[j1].append([])
            for j2 in range(j1 + 1, j_num):
                y_jkjkm[j1][k1].append([])
                for k2 in range(2):
                    y_jkjkm[j1][k1][j2 - j1 - 1].append([])
                    for m in machine_set:
                        y_jkjkm[j1][k1][j2 - j1 - 1][k2].append(p1_2.addVar(lb = 0, vtype = GRB.BINARY, 
                                                       name = "y_" + str(j1 + 1) + "," + str(k1 + 1) + "," + str(j2 + 1) + "," + str(k2 + 1)+ "," + str(m)))

    c_j = []
    for j in range(j_num):
        c_j.append(p1_2.addVar(lb = 0, vtype = GRB.CONTINUOUS, name = "c_" + str(j + 1)))
    t_j = []
    for j in range(j_num):
        t_j.append(p1_2.addVar(lb = 0, vtype = GRB.BINARY, name = "t_" + str(j + 1)))
        
        
    # setting the objective function 
    p1_2.setObjective(u, GRB.MINIMIZE) 
    
    
    # add constraints and name them
    # makespan
    p1_2.addConstrs((u >= c_j[j] for j in range(j_num)))
    
    # tardy jobs num set to "lesser than or equal to" T
    p1_2.addConstr((quicksum(t_j[j] for j in range(j_num)) <= T))

    # stage 1 machine limit
    p1_2.addConstrs((x_jkm[j][0][m] <= m_dummy_stage1[j][m] for j in range(j_num) for m in range(m_len)))
    # stage 2 machine limit
    p1_2.addConstrs((x_jkm[j][1][m] <= m_dummy_stage2[j][m] for j in range(j_num) for m in range(m_len)))

    # tardy variable
    p1_2.addConstrs((t_j[j] * L >= c_j[j] - due_time[j] 
                    for j in range(j_num)), "tardy count")

    # job complete time
    p1_2.addConstrs((c_j[j] >= quicksum(c_jkm[j][1][m] for m in range(m_len))
                    for j in range(j_num)), "job completion time")

    # each job assigned once
    p1_2.addConstrs((quicksum(x_jkm[j][k][m] for m in range(m_len)) == 1 for j in range(j_num) for k in range(2)), "job assignment constraint")

    p1_2.addConstrs((s_jkm[j][k][m] + c_jkm[j][k][m] <= x_jkm[j][k][m] * L for j in range(j_num) for k in range(2) for m in range(m_len)))

    # process time
    p1_2.addConstrs((c_jkm[j][0][m] >= s_jkm[j][0][m] + pt_stage1[j] - (1 - x_jkm[j][0][m]) * L for j in range(j_num) for m in range(m_len)))
    p1_2.addConstrs((c_jkm[j][1][m] >= s_jkm[j][1][m] + pt_stage2[j] - (1 - x_jkm[j][1][m]) * L for j in range(j_num) for m in range(m_len)))

    for m in range(m_len):
        for j1 in range(j_num):
            for k1 in range(2):
                for j2 in range(j_num - j1 - 1):
                    for k2 in range(2):
                    # 注意ijm的indexing方法不同
                        p1_2.addConstr(s_jkm[j1][k1][m] >= c_jkm[j1 + j2 + 1][k2][m] - y_jkjkm[j1][k1][j2][k2][m] * L)
                        p1_2.addConstr(s_jkm[j1 + j2 + 1][k2][m] >= c_jkm[j1][k1][m] - (1 - y_jkjkm[j1][k1][j2][k2][m]) * L)

    # subjob 2 starts only after subjob 1
    p1_2.addConstrs((quicksum(s_jkm[j][1][m] for m in range(m_len)) >= quicksum(c_jkm[j][0][m] for m in range(m_len)) for j in range(j_num)), "subjob 1 ends before subjob2 starts")
    
    r1_2 = p1_2.relax()
    r1_2.optimize()
    
    
    return r1.objVal, r1_2.objVal

## Set instances and test

In [13]:
# set instances to test
path_list = ["instance 1.csv", "instance 2.csv", "instance 3.csv", "instance 4.csv", "instance 5.csv"]
# path_list = ["instance 5.csv"]

In [14]:
tardy_list = []
makespan_list = []

for path in path_list:
    df, machine_set, dummyM1_col, dummyM2_col = file_preprocess(path)
    tardy, makespan = IP(df, machine_set, dummyM1_col, dummyM2_col, 60, 180)
    tardy_list.append(tardy)
    makespan_list.append(makespan)

for i in range(len(path_list)):
    print("----------------")
    print("file:", path_list[i])
    print("number of tardy jobs:", tardy_list[i])
    print("makespan:", makespan_list[i])
    print("----------------")

Set parameter TimeLimit to value 60
Derived model does not contain these changes.
Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (mac64[arm])
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads
Optimize a model with 0 rows, 0 columns and 0 nonzeros
Model fingerprint: 0xf9715da1
Coefficient statistics:
  Matrix range     [0e+00, 0e+00]
  Objective range  [0e+00, 0e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [0e+00, 0e+00]
Presolve time: 0.00s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    0.0000000e+00   0.000000e+00   0.000000e+00      0s

Solved in 0 iterations and 0.00 seconds (0.00 work units)
Optimal objective  0.000000000e+00
Set parameter TimeLimit to value 180
15
2
10
Derived model does not contain these changes.
Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (mac64[arm])
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads
Optimize a model with 

In [None]:
tardy_list = []
makespan_list = []

for path in path_list:
    df, machine_set, dummyM1_col, dummyM2_col = file_preprocess(path)
    tardy, makespan = IP(df, machine_set, dummyM1_col, dummyM2_col, 90, 270)
    tardy_list.append(tardy)
    makespan_list.append(makespan)
    print("----------------")
    print("file:", path)
    print("number of tardy jobs:", tardy)
    print("makespan:", makespan)
    print("----------------")

Set parameter TimeLimit to value 90
Gurobi Optimizer version 9.5.1 build v9.5.1rc2 (mac64[arm])
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads
Optimize a model with 9375 rows, 5130 columns and 28095 nonzeros
Model fingerprint: 0x6a9a2da8
Variable types: 615 continuous, 4515 integer (4515 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+04]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+04]
Presolve removed 2729 rows and 1337 columns
Presolve time: 0.05s
Presolved: 6646 rows, 3793 columns, 20121 nonzeros
Variable types: 476 continuous, 3317 integer (3317 binary)
Found heuristic solution: objective 14.0000000

Root relaxation: objective 5.664488e-04, 850 iterations, 0.01 seconds (0.01 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.00057    0   24   14.00000    

Showing first log only...

Root relaxation presolve removed 248 rows and 237 columns
Root relaxation presolved: 204886 rows, 105912 columns, 617041 nonzeros


Use crossover to convert LP symmetric solution to basic solution...
Concurrent spin time: 0.04s

Solved with dual simplex (dual model)

Root relaxation: objective 1.840000e+01, 7623 iterations, 1.04 seconds (1.27 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0   18.40000    0 15206  318.40000   18.40000  94.2%     -    9s
H    0     0                     158.1000000   18.40000  88.4%     -   10s
     0     0   18.40000    0 2535  158.10000   18.40000  88.4%     -   25s
     0     0   18.40000    0 1807  158.10000   18.40000  88.4%     -   32s
     0     0   18.40000    0 2674  158.10000   18.40000  88.4%     -   49s
     0     0   18.40000    0 1916  158.10000   18.40000  88.4%     -   60s
