In [1]:
from pathlib import Path
from itertools import product

import numpy as np
import pandas as pd
from scipy import stats

In [2]:
DATA_DIR = Path("../data/")

In [3]:
def read(path: Path, line_identifier="_10_"):
    with open(path, "r") as fh:
        lines = fh.readlines()
    
    data = [line.split() for line in lines if line_identifier in line]
    columns = ["instance", "env_seed", "agent_seed", "cost", "time"]
    types = [str, int, int, int, float]
    
    df = pd.DataFrame(data, columns=columns)
    for col, dtype in zip(columns, types):
        df[col] = df[col].astype(dtype)
        
    return df

def get_category(instance: str):
    if instance.startswith("C"):
        return "C"
    elif instance.startswith("RC"):
        return "RC"
    elif instance.startswith("R"):
        return "R"
    else:
        raise ValueError("Unknown instance name.")

def res2df(dir: Path):
    dfs = [] 
    for path in dir.rglob("*.txt"):
        df = read(path)
        
        *_, tw, arrival, method, fname = path.parts
        df["tw"] = tw
        df["method"] = method
        df["arrival"] = arrival
        df["category"] = df['instance'].apply(get_category)
    
        dfs.append(df)
    
    return pd.concat(dfs)

## Benchmark

In [4]:
df = res2df(DATA_DIR / "benchmark")
df = df[df['method'] != 'icd-rolling-horizon']
df.to_csv("../data/benchmark.csv", index=False)

best = df.groupby(["instance", "env_seed", "tw", "arrival"])["cost"].min().reset_index()
algs = df.groupby(["instance", "env_seed", "tw", "arrival", "method"])["cost"].min().unstack().reset_index()

best = best.sort_values(["instance", "env_seed", "tw", "arrival"])
algs = algs.sort_values(["instance", "env_seed", "tw", "arrival"])
algs["best"] = best["cost"]

In [5]:
# Sanity check that all results are in
null_rows = df[df.isnull().any(axis=1)]
assert null_rows.empty

df.groupby(['method']).count()

Unnamed: 0_level_0,instance,env_seed,agent_seed,cost,time,tw,arrival,category
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
hindsight,7200,7200,7200,7200,7200,7200,7200,7200
icd-dispatch-only,7200,7200,7200,7200,7200,7200,7200,7200
icd-double-threshold,7200,7200,7200,7200,7200,7200,7200,7200
icd-hamming-distance,7200,7200,7200,7200,7200,7200,7200,7200
icd-postpone-only,7200,7200,7200,7200,7200,7200,7200,7200
icd-rolling-horizon1,7200,7200,7200,7200,7200,7200,7200,7200


In [6]:
df = algs

# Compute gap for each method.
methods = [
    "icd-rolling-horizon1",
    "icd-dispatch-only",
    "icd-postpone-only",
    "icd-hamming-distance",
    "icd-double-threshold",
]
for method in methods:
    df[f"{method}_gap"] = (df[method] - df["best"]) / df["best"] * 100

df["category"] = df["instance"].apply(get_category)

In [7]:
df.groupby(["category", "arrival", "tw"])[[method + "_gap" for method in methods]].mean().round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,method,icd-rolling-horizon1_gap,icd-dispatch-only_gap,icd-postpone-only_gap,icd-hamming-distance_gap,icd-double-threshold_gap
category,arrival,tw,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C,hom,DL2,1.8,0.12,0.16,0.15,0.12
C,hom,DL4,3.0,1.41,1.91,1.29,1.38
C,hom,DL8,4.28,3.12,4.45,2.88,3.08
C,hom,TW2,7.68,6.39,7.99,6.48,5.99
C,hom,TW4,7.29,6.38,8.11,6.39,5.7
C,hom,TW8,6.67,5.92,7.38,5.97,5.29
C,uni,DL2,2.2,0.12,0.16,0.17,0.12
C,uni,DL4,4.65,2.63,3.16,2.63,2.55
C,uni,DL8,6.77,5.1,6.39,5.12,5.02
C,uni,TW2,9.95,9.91,11.15,10.25,9.21


In [8]:
# Groupby instance variation
res = df.groupby(["category", "arrival", "tw"])[[method + "_gap" for method in methods]].mean()
means = res.mean()

# Change column names
res.columns = ["RH", "DSHH", "Postpone", "Hamming", "Double"]

# Order as R, C and RC
res.sort_index(level=0, inplace=True, key=lambda x: x.map({"R": 0, "C":1, "RC":2}))

# Add dummy column for spacing and alignment
res.insert(0, "", "")

text = res.to_latex(float_format="%.2f")
text = text.replace("hom", "HOM")
text = text.replace("uni", "UNI")
print(text)

\begin{tabular}{llllrrrrr}
\toprule
   &     &     &    RH &  DSHH &  Postpone &  Hamming &  Double \\
category & arrival & tw &   &       &       &           &          &         \\
\midrule
R & HOM & DL2 &   &  4.02 &  2.66 &      3.07 &     2.46 &    2.57 \\
   &     & DL4 &   &  8.66 &  6.84 &      8.00 &     6.38 &    6.35 \\
   &     & DL8 &   & 11.74 & 10.20 &     12.35 &     9.86 &    9.73 \\
   &     & TW2 &   & 13.38 & 12.40 &     13.47 &    12.64 &   11.02 \\
   &     & TW4 &   & 13.84 & 13.07 &     14.29 &    13.49 &   11.76 \\
   &     & TW8 &   & 13.52 & 13.17 &     14.10 &    13.54 &   11.66 \\
   & UNI & DL2 &   &  4.74 &  2.73 &      3.01 &     2.78 &    2.69 \\
   &     & DL4 &   & 10.20 &  8.03 &      8.87 &     7.79 &    7.56 \\
   &     & DL8 &   & 13.55 & 12.13 &     13.17 &    12.19 &   11.41 \\
   &     & TW2 &   & 14.15 & 15.12 &     14.91 &    15.39 &   13.40 \\
   &     & TW4 &   & 14.77 & 15.86 &     15.91 &    16.48 &   14.46 \\
   &     & TW8 &   & 14.63 &

  text = res.to_latex(float_format="%.2f")


In [9]:
means.round(2)

method
icd-rolling-horizon1_gap    9.58
icd-dispatch-only_gap       8.76
icd-postpone-only_gap       9.66
icd-hamming-distance_gap    8.83
icd-double-threshold_gap    8.06
dtype: float64

### Statistical tests

In [10]:
SIGNIFICANCE_LEVEL = 0.0125

In [11]:
groups = df.groupby(["category", "arrival", "tw", "env_seed"])[[method + "_gap" for method in methods]].mean()
variations = list(product(
    ["R", "C" ,"RC"],
    df["arrival"].unique(),
    df["tw"].unique(),
))
table_data = []

for category, arrival, tw in variations:
    data = groups.xs((category, arrival, tw)).dropna()
    baseline_method = data.mean().idxmin().removesuffix("_gap")
    baseline_data = data[baseline_method + "_gap"]
    p_vals = []

    for other_method in methods:
        if other_method != baseline_method:
            other_data = data[other_method + "_gap"]
            p_val = stats.ttest_ind(baseline_data, other_data).pvalue
            p_vals.append(p_val)
        else:
            p_vals.append(None)
            
    if all(p_val < SIGNIFICANCE_LEVEL for p_val in p_vals if p_val is not None):
        print(category, arrival, tw, baseline_method)

    table_data.append(p_vals)

R hom TW2 icd-double-threshold
R hom TW4 icd-double-threshold
R hom TW8 icd-double-threshold
R uni DL8 icd-double-threshold
R uni TW2 icd-double-threshold
C hom TW2 icd-double-threshold
C hom TW4 icd-double-threshold
C hom TW8 icd-double-threshold
C uni TW2 icd-double-threshold
C uni TW4 icd-double-threshold
RC hom TW2 icd-double-threshold
RC hom TW4 icd-double-threshold
RC hom TW8 icd-double-threshold
RC uni TW2 icd-double-threshold


In [12]:
# Groupby instance variation
res = df.groupby(["category", "arrival", "tw"])[[method + "_gap" for method in methods]].mean()
means = res.mean()

# Change column names
res.columns = ["RH", "DSHH", "Postpone", "Hamming", "Double"]

for idx, p_vals in zip(res.index, table_data):
    res.loc[idx] = p_vals

# Order as R, C and RC
res.sort_index(level=0, inplace=True, key=lambda x: x.map({"R": 0, "C":1, "RC":2}))

# Add dummy column for spacing and alignment
res.insert(0, "", "")

text = res.to_latex(float_format="%.3f")
text = text.replace("hom", "HOM")
text = text.replace("uni", "UNI")
text = text.replace("0.000", "\\textless 0.001")
text = text.replace("NaN", "-")
text = text.replace("\\cline{1-9} \\cline{2-9}", "\\midrule")
text = text.replace("\\cline{2-9}", "\\cmidrule(lr){2-9}")

print(text)

\begin{tabular}{llllrrrrr}
\toprule
   &     &     &    RH &  DSHH &  Postpone &  Hamming &  Double \\
category & arrival & tw &   &       &       &           &          &         \\
\midrule
R & HOM & DL2 &   & \textless 0.001 &   - &     0.014 &    0.044 &   0.911 \\
   &     & DL4 &   & \textless 0.001 & 0.132 &     \textless 0.001 &      - &   0.260 \\
   &     & DL8 &   & \textless 0.001 & 0.019 &     \textless 0.001 &      - &   0.075 \\
   &     & TW2 &   & \textless 0.001 & 0.001 &     \textless 0.001 &    \textless 0.001 &     - \\
   &     & TW4 &   & \textless 0.001 & \textless 0.001 &     \textless 0.001 &    \textless 0.001 &     - \\
   &     & TW8 &   & \textless 0.001 & \textless 0.001 &     \textless 0.001 &    \textless 0.001 &     - \\
   & UNI & DL2 &   & \textless 0.001 & 0.936 &     0.006 &    0.006 &     - \\
   &     & DL4 &   & \textless 0.001 & 0.452 &     \textless 0.001 &    0.497 &     - \\
   &     & DL8 &   & \textless 0.001 & 0.526 &     \textless 0.001 

  text = res.to_latex(float_format="%.3f")


In [13]:
res.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RH,DSHH,Postpone,Hamming,Double
category,arrival,tw,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
R,hom,DL2,,0.0,,0.014,0.044,0.911
R,hom,DL4,,0.0,0.132,0.0,,0.26
R,hom,DL8,,0.0,0.019,0.0,,0.075
R,hom,TW2,,0.0,0.001,0.0,0.0,
R,hom,TW4,,0.0,0.0,0.0,0.0,
R,hom,TW8,,0.0,0.0,0.0,0.0,
R,uni,DL2,,0.0,0.936,0.006,0.006,
R,uni,DL4,,0.0,0.452,0.0,0.497,
R,uni,DL8,,0.0,0.526,0.0,0.429,
R,uni,TW2,,0.0,0.0,0.0,0.0,
