In [None]:
import glob
import json
import os

import matplotlib as mpl
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np

from matplotlib.transforms import Affine2D, Bbox, TransformedBbox

from plotting import autolabel_bars, COLOR_DICT, latexify, SNS_COLORS

## Data Preparation

In [None]:
files = [f for f in glob.glob("../logs/h?/*/*.json") if "h5" not in f]
experiments = []
data = {}
acc_1 = {}
acc_5 = {}
best_acc_1 = {}
best_acc_5 = {}
epoch_times = {}

N_IMAGENET = 1331167
N_IMAGENET_TRAIN = 1281167

In [None]:
def parse_exp_report(file, data, acc_1, acc_5, best_acc_1, best_acc_5, best_acc_key, epoch_times):
    with open(file, "r") as f:
        raw_data = f.readlines()
    for line in raw_data:
        parsed_line = json.loads(line[5:-1])

        if parsed_line["type"] == "LOG":
            if parsed_line["step"][0] == "T":
                data["train"].append(parsed_line)
            elif parsed_line["step"][0] == "V":
                data["val"].append(parsed_line)
            elif parsed_line["step"][0] == "E":
                data["epoch"].append(parsed_line)
            elif parsed_line["step"][0] == "S":
                data["summary"].append(parsed_line)
            elif parsed_line["step"][0] == "P":
                data["params"].append(parsed_line)
            else:
                raise ValueError(f"Wrong step type: {parsed_line['step'][0]}")

    for log in data["epoch"]:
        if "v.top1" in log["data"].keys():
            acc_1.append((log["step"][1] + 1, float(log["elapsedtime"]) / 3600, log["data"]["v.top1"]))
            acc_5.append((log["step"][1] + 1, float(log["elapsedtime"]) / 3600, log["data"]["v.top5"]))
        elif "e.total_time" in log["data"].keys():
            epoch_times.append((log["step"][1] + 1, float(log["data"]["e.total_time"][:-1])))

    if len(data["summary"]):
        for log in data["summary"]:
            if "v.top1" in log["data"].keys():
                best_acc_1[best_acc_key] = log["data"]["v.top1"]
                best_acc_5[best_acc_key] = log["data"]["v.top5"]
    else:
        _, _, acc = map(list, zip(*acc_1[exp_name]))
        best_acc_1[best_acc_key] = max(acc)
        _, _, acc = map(list, zip(*acc_5[exp_name]))
        best_acc_5[best_acc_key] = max(acc)

In [None]:
for file in files:
    exp_name = f"{file.split('/')[-3]}-{file.split('/')[-2]}"
    
    if "standalone" in exp_name:
        # Standalone data loader output
        experiments.append(exp_name)
        with open(file, "r") as f:
            data[exp_name] = json.load(f)
    elif exp_name.startswith("h3"):
        # Training regimen with multiple stages
        if exp_name not in data:
            experiments.append(exp_name)
            data[exp_name] = {}
            acc_1[exp_name] = {}
            acc_5[exp_name] = {}
            best_acc_1[exp_name] = {}
            best_acc_5[exp_name] = {}
            epoch_times[exp_name] = {}
        
        base_name = os.path.splitext(os.path.basename(file))[0]
        if base_name == "regimen_log":
            pass
        else:
            stage = base_name.split("_")[-1]
            data[exp_name][stage] = {}
            data[exp_name][stage]["train"] = []
            data[exp_name][stage]["val"] = []
            data[exp_name][stage]["epoch"] = []
            data[exp_name][stage]["summary"] = []
            data[exp_name][stage]["params"] = []
            acc_1[exp_name][stage] = []
            acc_5[exp_name][stage] = []
            epoch_times[exp_name][stage] = []
            
            parse_exp_report(
                file,
                data[exp_name][stage],
                acc_1[exp_name][stage],
                acc_5[exp_name][stage],
                best_acc_1[exp_name],
                best_acc_5[exp_name],
                stage,
                epoch_times[exp_name][stage]
            )
    else:
        # Default case
        experiments.append(exp_name)
        data[exp_name] = {}
        data[exp_name]["train"] = []
        data[exp_name]["val"] = []
        data[exp_name]["epoch"] = []
        data[exp_name]["summary"] = []
        data[exp_name]["params"] = []
        acc_1[exp_name] = []
        acc_5[exp_name] = []
        epoch_times[exp_name] = []
        
        parse_exp_report(
            file,
            data[exp_name],
            acc_1[exp_name],
            acc_5[exp_name],
            best_acc_1,
            best_acc_5,
            exp_name,
            epoch_times[exp_name]
        )

In [None]:
for exp in experiments:
    if exp.startswith("h3"):
        l = []
        for k, v in acc_1[exp].items():
            l += v
        acc_1[exp] = sorted(l, key=lambda x: x[0])
        
        l = []
        for k, v in acc_5[exp].items():
            l += v
        acc_5[exp] = sorted(l, key=lambda x: x[0])
        
        m = 0
        for k, v in best_acc_1[exp].items():
            m = v if v > m else m
        best_acc_1[exp] = m
        
        m = 0
        for k, v in best_acc_5[exp].items():
            m = v if v > m else m
        best_acc_5[exp] = m
        
        l = []
        for k, v in epoch_times[exp].items():
            l += v
        epoch_times[exp] = sorted(l, key=lambda x: x[0])

In [None]:
# Taken from `misc.ipynb`
repr_epoch_times = {
    "inet-r50-raw-hdd": 6585,
    "inet-r50-jpeg85-hdd": 2584,
    "inet-r50-jpeg75-hdd": 1251,
    "inet-r50-jpeg50-hdd": 1250,
    "inet-r50-jpeg25-hdd": 1248,
    "inet-r50-jpeg10-hdd": 1248,
    "inet-r18-raw-hdd": 6585,
    "inet-r18-jpeg85-hdd": 2584,
    "inet-r18-jpeg75-hdd": 452,
    "inet-r18-jpeg50-hdd": 449,
    "inet-r18-jpeg25-hdd": 449,
    "inet-r18-jpeg10-hdd": 448,
    "inet-alex-raw-hdd": 6585,
    "inet-alex-jpeg85-hdd": 2584,
    "inet-alex-jpeg75-hdd": 215,
    "inet-alex-jpeg50-hdd": 200,
    "inet-alex-jpeg25-hdd": 192,
    "inet-alex-jpeg10-hdd": 185,
    "inet-alex-raw-ssd": 516,
    "inet-alex-jpeg85-ssd": 265,
    "inet-alex-jpeg75-ssd": 214,
    "inet-alex-jpeg50-ssd": 201,
    "inet-alex-jpeg25-ssd": 190,
    "inet-alex-jpeg10-ssd": 186
}

## Plotting Code

In [None]:
def plot_intro_acc_comparison(exps, filename, tight_layout=True, figsize=(10,10), bbox_padding=0.05):
    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    bars = []

    bars += ax.bar(1, best_acc_1[exps[0]], color=SNS_COLORS[3])
    bars += ax.bar(2, best_acc_1[exps[1]], color=SNS_COLORS[4])

    ax.set_xticks([1, 2])
    ax.set_xticklabels(["Orig.", "Compr."])
    ax.set_ylabel("Accuracy (%)")
    ax.set_ylim(0, 105)
    
    autolabel_bars(ax, bars, precision=0)

    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()

In [None]:
def plot_throughput_comparison(exps, filename, tight_layout=True, figsize=(10,10), legend_loc=None, print_ylabel=False, width=0.75, ylim_max=None, bbox_padding=0.05):
    mems = []
    hdd = None
    ssd = None
    synth = None
    for exp in exps:
        config = exp.split("-")
        times = np.array([i[1] for i in epoch_times[exp][1:]])
        if config[3] == "mem":
            mems.append((exp, times))
        elif config[3] == "hdd":
            hdd = (exp, times)
        elif config[3] == "ssd":
            ssd = (exp, times)
        elif config[3] == "synth":
            synth = (exp, times)
        else:
            print(exp)
            raise ValueError

    mems = sorted(mems, key=lambda x: int(x[0].split("-")[4][1:]))

    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    bars = []
    labels = []

    x = np.arange(len(exps))
    
    bars += ax.bar(x[0], N_IMAGENET / hdd[1].mean(), width, color=SNS_COLORS[0])
    labels.append(ssd[0].split("-")[4][1:])
    bars += ax.bar(x[1], N_IMAGENET / ssd[1].mean(), width, color=SNS_COLORS[1])
    labels.append(hdd[0].split("-")[4][1:])
    for i, mem in enumerate(mems):
        bars += ax.bar(x[i+2], N_IMAGENET / mem[1].mean(), width, color=SNS_COLORS[2])
        labels.append(mem[0].split("-")[4][1:])
    bars += ax.bar(x[-1], N_IMAGENET / synth[1].mean(), width, color=SNS_COLORS[3])
    labels.append("-")
    
    handles = []
    handles.append(mpatches.Patch(color=SNS_COLORS[0], label="HDD"))
    handles.append(mpatches.Patch(color=SNS_COLORS[1], label="SSD"))
    handles.append(mpatches.Patch(color=SNS_COLORS[2], label="Memory"))
    handles.append(mpatches.Patch(color=SNS_COLORS[3], label="GPU"))

    if legend_loc:
        ax.legend(handles=handles, loc=legend_loc, ncol=3)
    ax.set_xlabel("Number of data loader workers")
    ax.set_xticks(ticks=x)
    ax.set_xticklabels(labels=labels)
    if print_ylabel:
        ax.set_ylabel("Throughput (img/s)")
    if ylim_max:
        ax.set_ylim(0, ylim_max)

    autolabel_bars(ax, bars, precision=0)

    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        bbox = ax.get_tightbbox(f.canvas.get_renderer())
        bbox = TransformedBbox(bbox, Affine2D().scale(1./f.dpi))
        bbox = bbox.get_points()
        
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()
        
        return (bbox[0][1] - bbox_padding, bbox[1][1] + bbox_padding)

In [None]:
def plot_standalone_dl_comparison(exps, filename, tick_labels, tight_layout=True, figsize=(15,10), legend_loc=None, print_xlabel=True, print_ylabel=False, width=0.75, ylim_max=None, bbox_padding=0.05):
    hdd = []
    hdd_std = []
    ssd = []
    ssd_std = []
    mem = []
    mem_std = []

    for exp in sorted(exps, reverse=True):
        if "hdd" in exp:
            hdd.append(N_IMAGENET / np.array(data[exp]["epoch_times"][1:]).mean())
        elif "ssd" in exp:
            ssd.append(N_IMAGENET / np.array(data[exp]["epoch_times"][1:]).mean())
        elif "mem" in exp:
            mem.append(N_IMAGENET / np.array(data[exp]["epoch_times"][1:]).mean())

    fig, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    bars = []
    spacing = 0.05 * width
    x = np.arange(3 * len(tick_labels), step=3)

    bars += ax.bar(x - 1 * width - 1 * spacing, hdd, width, color=SNS_COLORS[0])
    bars += ax.bar(x - 0 * width + 0 * spacing, ssd, width, color=SNS_COLORS[1])
    bars += ax.bar(x + 1 * width + 1 * spacing, mem, width, color=SNS_COLORS[2])

    handles = []
    handles.append(mpatches.Patch(color=SNS_COLORS[0], label="HDD"))
    handles.append(mpatches.Patch(color=SNS_COLORS[1], label="SSD"))
    handles.append(mpatches.Patch(color=SNS_COLORS[2], label="Memory"))

    if legend_loc:
        ax.legend(handles=handles, loc=legend_loc, ncol=3)
    if print_xlabel:
        ax.set_xlabel("Dataset Variant")
    ax.set_xticks(ticks=x)
    ax.set_xticklabels(labels=tick_labels)
    if print_ylabel:
        ax.set_ylabel("Throughput (img/s)")
    if ylim_max:
        ax.set_ylim(0, ylim_max)

    autolabel_bars(ax, bars, precision=0)

    if tight_layout:
        plt.tight_layout(pad=1.02)

    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()

In [None]:
def plot_codec_comparison(exps, filename, tight_layout=True, figsize=(10,10), legend_loc="upper left", width=0.75, ylim_max=None, bbox_padding=0.05):
    jpeg = []
    webp = []
    labels = []
    
    for exp in exps:
        times = np.array(epoch_times[exp])[1:,1]
        labels.append(int(exp.split("-")[4][1:]))
        if "jpeg" in exp:
            jpeg.append((exp, times))
        elif "webp" in exp:
            webp.append((exp, times))
        else:
            raise Error

    jpeg = sorted(jpeg, key=lambda x: int(x[0].split("-")[4][1:]))
    jpeg = [N_IMAGENET / exp[1].mean() for exp in jpeg]
    webp = sorted(webp, key=lambda x: int(x[0].split("-")[4][1:]))
    webp = [N_IMAGENET / exp[1].mean() for exp in webp]
    labels = sorted(list(set(labels)))
    assert len(jpeg) == len(webp)

    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    bars = []
    
    spacing = 0.05 * width
    x = np.arange(2 * len(jpeg), step=2)

    bars += ax.bar(x - 0.5 * width - 0.5 * spacing, jpeg, width, color=SNS_COLORS[0])
    bars += ax.bar(x + 0.5 * width + 0.5 * spacing, webp, width, color=SNS_COLORS[1])

    handles = []
    handles.append(mpatches.Patch(color=SNS_COLORS[0], label="JPEG"))
    handles.append(mpatches.Patch(color=SNS_COLORS[1], label="WebP"))

    ax.legend(handles=handles, loc=legend_loc, ncol=2)
    ax.set_xlabel("Number of data loader workers")
    ax.set_xticks(ticks=x)
    ax.set_xticklabels(labels=labels)
    ax.set_ylabel("Throughput (img/s)")
    if ylim_max:
        ax.set_ylim(0, ylim_max)

    autolabel_bars(ax, bars, precision=0)

    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()

In [None]:
def plot_acc_experiment(exps, filename, figsize=(10,10), tight_layout=True, epochs=90, legend=None, print_ylabel=False, bbox_padding=0.05):
    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    
    handles = []
    for exp in sorted(exps, reverse=True):
        variant = exp.split("-")[4]
        name = f"QF {int(variant[-2:])}" if variant != "raw" else "Original"
        handles.append(mpatches.Patch(color=COLOR_DICT[variant], label=name))

        epoch, _, acc = map(list, zip(*acc_1[exp]))
        ax.plot(epoch, acc, aa=True, linestyle="-", color=COLOR_DICT[variant])

        epoch, _, acc = map(list, zip(*acc_5[exp]))
        ax.plot(epoch, acc, aa=True, linestyle="--", color=COLOR_DICT[variant])
    
    ax.set_xlabel("Epoch")
    ax.set_xlim(0, epochs)
    if print_ylabel:
        ax.set_ylabel("Accuracy (%)")
    ax.set_ylim(0, 100)
    
    if legend:
        handles.append(mlines.Line2D([], [], color="black", alpha=0.6, linestyle="-", label="Top-1 accuracy"))
        handles.append(mlines.Line2D([], [], color="black", alpha=0.6, linestyle="--", label="Top-5 accuracy"))
        ax.legend(handles=handles, loc=legend, ncol=3)
    
    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()

In [None]:
def plot_acc_experiment_zoom(exps, filename, figsize=(10,10), tight_layout=True, epochs=90, zoom_min=60, zoom_max=80, print_ylabel=False, bbox_padding=0.05):
    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    
    for exp in exps:
        variant = exp.split("-")[4]

        epoch, _, acc = map(list, zip(*acc_1[exp]))
        ax.plot(epoch, acc, aa=True, linestyle="-", color=COLOR_DICT[variant])

        epoch, _, acc = map(list, zip(*acc_5[exp]))
        ax.plot(epoch, acc, aa=True, linestyle="--", color=COLOR_DICT[variant])
    
    ax.set_xlabel("Epoch")
    ax.set_xlim(epochs * 0.9, epochs)
    if print_ylabel:
        ax.set_ylabel("Top-1 accuracy (%)")
    ax.set_ylim(zoom_min, zoom_max)
    ax.set_yticks([zoom_min, (zoom_min + zoom_max) / 2, zoom_max])
    
    if "r50" in filename:
        if "inet" in filename:
            ax.axhline(75.9, color="gray", alpha=0.8, linestyle=":", linewidth=1.5)
        elif "p365" in filename:
            ax.axhline(54.74, color="gray", alpha=0.8, linestyle=":", linewidth=1.5)
    
    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        bbox = ax.get_tightbbox(f.canvas.get_renderer())
        bbox = TransformedBbox(bbox, Affine2D().scale(1./f.dpi))
        bbox = bbox.get_points()
        
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()
        
        return (bbox[0][1] - bbox_padding, bbox[1][1] + bbox_padding)

In [None]:
def plot_time_experiment(exps, filename, time_limit, figsize=(10,10), tight_layout=True, print_ylabel=False, bbox_padding=0.05):
    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    
    for exp in exps:
        variant = exp.split("-")[4]
        epoch_time = repr_epoch_times["-".join(exp.split("-")[2:6])]

        epoch, _, acc = map(list, zip(*acc_1[exp]))
        ax.plot(np.array(epoch) * epoch_time / 3600, acc, aa=True, linestyle="-", color=COLOR_DICT[variant])

        epoch, _, acc = map(list, zip(*acc_5[exp]))
        ax.plot(np.array(epoch) * epoch_time / 3600, acc, aa=True, linestyle="--", color=COLOR_DICT[variant])
    
    ax.set_xlabel("Time (h)")
    ax.set_xlim(0, time_limit)
    if print_ylabel:
        ax.set_ylabel("Accuracy (%)")
    ax.set_ylim(0, 100)
    ax.locator_params(axis='x', nbins=5)
    if time_limit == 5:
        ax.locator_params(axis='x', nbins=6)
    
    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        bbox = ax.get_tightbbox(f.canvas.get_renderer())
        bbox = TransformedBbox(bbox, Affine2D().scale(1./f.dpi))
        bbox = bbox.get_points()
        
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()
        
        return (bbox[0][1] - bbox_padding, bbox[1][1] + bbox_padding)

In [None]:
def plot_space_experiment(exp_group, filename, figsize=(10,10), tight_layout=True, epochs=90, print_ylabel=False, bbox_padding=0.05):
    if "inet" in exp_group:
        comparisons = [
            ("50", (f"h2-acc-{exp_group}-jpeg85-mem", f"h2-space-{exp_group}-raw50-mem")),
            ("40", (f"h2-acc-{exp_group}-jpeg75-mem", f"h2-space-{exp_group}-raw40-mem")),
            ("30", (f"h2-acc-{exp_group}-jpeg50-mem", f"h2-space-{exp_group}-raw30-mem")),
            ("20", (f"h2-acc-{exp_group}-jpeg25-mem", f"h2-space-{exp_group}-raw20-mem")),
            ("10", (f"h2-acc-{exp_group}-jpeg10-mem", f"h2-space-{exp_group}-raw10-mem")),
        ]
    elif "p365" in exp_group:
        comparisons = [
            ("50", (f"h2-acc-{exp_group}-jpeg25-mem", f"h2-space-{exp_group}-raw50-mem")),
            ("40", (f"h2-acc-{exp_group}-jpeg10-mem", f"h2-space-{exp_group}-raw40-mem")),
            ("30", (f"h2-acc-{exp_group}-jpeg10-mem", f"h2-space-{exp_group}-raw30-mem")),
            ("20", (f"h2-acc-{exp_group}-jpeg05-mem", f"h2-space-{exp_group}-raw20-mem")),
        ]
    else:
        raise ValueError
    
    for limit, exps in comparisons:
        f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
        
        variant = exps[0].split("-")[-2]

        epoch, _, acc = map(list, zip(*acc_1[exps[0]]))
        ax.plot(epoch, acc, aa=True, linestyle="-", color=COLOR_DICT[variant])
        epoch, _, acc = map(list, zip(*acc_1[exps[1]]))
        ax.plot(epoch, acc, aa=True, linestyle="-", color=COLOR_DICT["raw"])

        epoch, _, acc = map(list, zip(*acc_5[exps[0]]))
        ax.plot(epoch, acc, aa=True, linestyle="--", color=COLOR_DICT[variant])
        epoch, _, acc = map(list, zip(*acc_5[exps[1]]))
        ax.plot(epoch, acc, aa=True, linestyle="--", color=COLOR_DICT["raw"])
        
        ax.set_xlabel("Epoch")
        ax.set_xlim(0, epochs)
        if print_ylabel:
            ax.set_ylabel("Accuracy (%)")
        ax.set_ylim(0, 100)
        ax.locator_params(axis='x', nbins=4)
        
        if tight_layout:
            plt.tight_layout(pad=1.02)

        if DEBUG:
            plt.show()
        else:
            bbox = ax.get_tightbbox(f.canvas.get_renderer())
            bbox = TransformedBbox(bbox, Affine2D().scale(1./f.dpi))
            bbox = bbox.get_points()
        
            plt.savefig(f"../talk/{filename}_{limit}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
            plt.close()
            
    if not DEBUG:
        return (bbox[0][1] - bbox_padding, bbox[1][1] + bbox_padding)

In [None]:
def plot_legend(ids, filename, ncol=2, figsize=(10,10), tight_layout=True, bbox_inches="tight", bbox_padding=0.05):
    f = plt.figure(figsize=(figsize[0]/2.54, figsize[1]/2.54))
    
    handles = []
    for i in ids:
        if "Top-1" in i:
            handles.append(mlines.Line2D([], [], color="black", alpha=0.6, linestyle="-", label=i))
        elif "Top-5" in i:
            handles.append(mlines.Line2D([], [], color="black", alpha=0.6, linestyle="--", label=i))
        elif i.startswith("b_"):
            handles.append(mlines.Line2D([], [], color=COLOR_DICT[i[2:]], linestyle="--", label=f"QF {int(i[-2:])}" if "raw" not in i else "Original"))
        elif i == "raw":
            handles.append(mpatches.Patch(color=COLOR_DICT[i], label="Original"))
        elif i == "subsample":
            handles.append(mpatches.Patch(color=COLOR_DICT["raw"], label="Subsample"))
        elif i == "hdd":
            handles.append(mpatches.Patch(color=SNS_COLORS[0], label="HDD"))
        elif i == "ssd":
            handles.append(mpatches.Patch(color=SNS_COLORS[1], label="SSD"))
        elif i == "memory":
            handles.append(mpatches.Patch(color=SNS_COLORS[2], label="Memory"))
        elif i == "gpu":
            handles.append(mpatches.Patch(color=SNS_COLORS[3], label="GPU"))
        else:
            handles.append(mpatches.Patch(color=COLOR_DICT[i], label=f"QF {int(i[-2:])}"))
    
    f.legend(handles=handles, loc="center", ncol=ncol, frameon=False, columnspacing=2, borderaxespad=0.5, labelspacing=0.5, fontsize=mpl.rcParams["font.size"], handlelength=1.5)
    
    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        plt.show()
    else:
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches=bbox_inches, pad_inches=bbox_padding)
        plt.close()

In [None]:
def plot_regimen_experiment(exps, baselines, filename, tight_layout=True, figsize=(10,10), legend_loc="lower right", epochs=90, zoom_min=60, zoom_max=80, print_ylabel=False, bbox_padding=0.05):
    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    
    handles = []
    # Print baselines first so that their z-order is below regimens
    for exp in baselines:
        variant = exp.split("-")[4]
        epoch, _, acc = map(list, zip(*acc_1[exp]))
        ax.plot(epoch, acc, aa=True, linestyle="--", color=COLOR_DICT[variant], alpha=0.9, linewidth=0.9)
    
    for exp in exps:
        regimen = exp.split("-")[3]
        regimen_type = "SR" if regimen.split("_")[0].split(":")[1] == "45" else "TLR"
        variant = regimen.split("_")[0].split(":")[0]
        handles.append(mlines.Line2D([], [], color=COLOR_DICT[f"jpeg{variant}"], label=f"{regimen_type} QF {variant}"))

        epoch, _, acc = map(list, zip(*acc_1[exp]))
        ax.plot(epoch, acc, aa=True, linestyle="-", color=COLOR_DICT[f"jpeg{variant}"], linewidth=1.5)

    ax.legend(handles=handles, loc=legend_loc, ncol=int((len(handles) + 1) / 2))
    ax.set_xlabel("Epoch")
    ax.set_xlim(epochs * 0.9, epochs)
    if print_ylabel:
        ax.set_ylabel("Top-1 accuracy (%)")
    ax.set_ylim(zoom_min, zoom_max)
    ax.set_yticks([zoom_min, (zoom_min + zoom_max) / 2, zoom_max])
    
    if "r50" in filename:
        if "inet" in filename:
            ax.axhline(75.9, color="gray", alpha=0.8, linestyle=":")
        elif "p365" in filename:
            ax.axhline(54.74, color="gray", alpha=0.8, linestyle=":")
    
    if tight_layout:
        plt.tight_layout(pad=1.02)
    
    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        bbox = ax.get_tightbbox(f.canvas.get_renderer())
        bbox = TransformedBbox(bbox, Affine2D().scale(1./f.dpi))
        bbox = bbox.get_points()
        
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()
        
        return (bbox[0][1] - bbox_padding, bbox[1][1] + bbox_padding)

In [None]:
def plot_orthogonality_experiment(exp_groups, tick_labels, filename, tight_layout=True, figsize=(15,10), legend_loc="upper left", bbox_padding=0.05):
    fig, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    
    width = 0.2
    spacing = 0.05 * width
    x_ticks = np.arange(len(exp_groups))
    
    bars = []
    for i, group in enumerate(exp_groups):
        default = np.array(epoch_times[f"{group}-raw-pytorch"])[1:,1]
        minio = np.array(epoch_times[f"{group}-raw-minio"])[1:,1]
        compression = np.array(epoch_times[f"{group}-jpeg85-pytorch"])[1:,1]
        combined = np.array(epoch_times[f"{group}-jpeg85-minio"])[1:,1]
        
        bars += ax.bar(i - 1.5 * width - 3 * spacing, N_IMAGENET_TRAIN / default.mean(), width, color=COLOR_DICT["default"])
        bars += ax.bar(i - 0.5 * width - 1 * spacing, N_IMAGENET_TRAIN / minio.mean(), width, color=COLOR_DICT["minio"])
        bars += ax.bar(i + 0.5 * width + 1 * spacing, N_IMAGENET_TRAIN / compression.mean(), width, color=COLOR_DICT["compressed"])
        bars += ax.bar(i + 1.5 * width + 3 * spacing, N_IMAGENET_TRAIN / combined.mean(), width, color=COLOR_DICT["combined"])

    handles = []
    handles.append(mpatches.Patch(color=COLOR_DICT["default"], label="Default"))
    handles.append(mpatches.Patch(color=COLOR_DICT["minio"], label="MinIO"))
    handles.append(mpatches.Patch(color=COLOR_DICT["compressed"], label="Compression"))
    handles.append(mpatches.Patch(color=COLOR_DICT["combined"], label="Combined"))
    
    ax.legend(handles=handles, loc=legend_loc, ncol=4)
    ax.set_xticks(ticks=x_ticks)
    ax.set_xticklabels(labels=tick_labels)
    ax.set_ylabel("Throughput (img/s)")
    ax.set_ylim(0, ax.get_ylim()[1] * 1.1)

    autolabel_bars(ax, bars, precision=0)

    if tight_layout:
        plt.tight_layout(pad=1.02)

    if DEBUG:
        ax.set_title(filename)
        plt.show()
    else:
        plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches="tight", pad_inches=bbox_padding)
        plt.close()

In [None]:
def plot_label(label, filename, figsize, width, height):
    f, ax = plt.subplots(1, 1, figsize=(figsize[0]/2.54, figsize[1]/2.54))
    f.supylabel(label)
    ax.remove()
    plt.savefig(f"../talk/{filename}.svg", format="svg", bbox_inches=Bbox([[width[0], height[0]], [width[1], height[1]]]))
    plt.close()

In [None]:
DEBUG = False
latexify(base_size=11)

# Change some plotting parameters compared to the paper
# The agg backend is necessary to get the Bbox size with ax.get_tightbbox(f.canvas.get_renderer()) since 
# the ps and pdf backends do not implement canvas.get_renderer()
mpl.rcParams.update(
    {
        "backend": "agg",
        "font.family": "sans-serif",
        "svg.fonttype": "none",
        "text.usetex": False,
    }
)

# Introduction Teaser

In [None]:
plot_standalone_dl_comparison([exp for exp in sorted(experiments) if "standalone" in exp and "dalic" in exp and "jpeg75" not in exp], "intro_teaser_throughput", tick_labels=["Original", "Compressed"], figsize=(9, 6), legend_loc="upper center", print_xlabel=False, print_ylabel=True, ylim_max=5500)
plot_intro_acc_comparison(["h2-acc-inet-r18-raw-mem", "h2-acc-inet-r18-jpeg10-mem"], "intro_teaser_accuracy", figsize=(4.5, 6))

## H1: Fetch and Decoding Bottlenecks

In [None]:
ybbox = plot_throughput_comparison([exp for exp in sorted(experiments) if "h1-r50-raw" in exp and "w12" not in exp], "h1a_r50", figsize=(10, 7), width=0.85, ylim_max=9200)
plot_throughput_comparison([exp for exp in sorted(experiments) if "h1-alex-raw" in exp], "h1a_alex", figsize=(10, 7), width=0.85, ylim_max=9200)
plot_standalone_dl_comparison([exp for exp in sorted(experiments) if "standalone" in exp and "dalic" in exp], "h1b_dalic", width=0.85, tick_labels=["Original", "JPEG 75", "JPEG 10"], figsize=(11, 5), ylim_max=4500)

plot_legend(["hdd", "ssd", "memory", "gpu"], "h1_legend_1row", figsize=(8, 3), ncol=4)
plot_label("Throughput (img/s)", "label_throughput_h1", figsize=(10, 7), width=(0, 0.32), height=ybbox)

In [None]:
# JPEG vs WebP comparison
plot_codec_comparison([exp for exp in experiments if "h1" in exp and "alex" in exp and "85" in exp], "h1c_codec_comparison", figsize=(14, 7), ylim_max=7200, width=0.8)

## H2: Image Compression as a Drop-in Replacement

### H2a: Achievable accuracy

In [None]:
plot_acc_experiment([exp for exp in sorted(experiments) if "h2-acc-inet-r50-raw" in exp or "h2-acc-inet-r50-jpeg" in exp], "h2a_inet_r50_jpeg", figsize=(12, 6), legend="lower right", print_ylabel=True)

ybbox = plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-inet-r50-raw" in exp or "h2-acc-inet-r50-jpeg" in exp], "h2a_inet_r50_jpeg_zoom", figsize=(7.2, 4.5), zoom_min=71, zoom_max=79)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-inet-r50-raw" in exp or "h2-acc-inet-r50-webp" in exp], "h2a_inet_r50_webp_zoom", figsize=(7.2, 4.5), zoom_min=71, zoom_max=79)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-inet-r18-raw" in exp or "h2-acc-inet-r18-jpeg" in exp], "h2a_inet_r18_jpeg_zoom", figsize=(7.2, 4.5), zoom_min=66, zoom_max=72)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-inet-alex-raw" in exp or "h2-acc-inet-alex-jpeg" in exp], "h2a_inet_alex_jpeg_zoom", figsize=(7.2, 4.5), zoom_min=50, zoom_max=56)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-p365-r50-raw" in exp or "h2-acc-p365-r50-jpeg" in exp], "h2a_p365_r50_jpeg_zoom", figsize=(7.2, 4.5), zoom_min=50, zoom_max=56)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-p365-r50-raw" in exp or "h2-acc-p365-r50-webp" in exp], "h2a_p365_r50_webp_zoom", figsize=(7.2, 4.5), zoom_min=50, zoom_max=56)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-p365-r18-raw" in exp or "h2-acc-p365-r18-jpeg" in exp], "h2a_p365_r18_jpeg_zoom", figsize=(7.2, 4.5), zoom_min=48, zoom_max=56)
plot_acc_experiment_zoom([exp for exp in sorted(experiments) if "h2-acc-p365-alex-raw" in exp or "h2-acc-p365-alex-jpeg" in exp], "h2a_p365_alex_jpeg_zoom", figsize=(7.2, 4.5), zoom_min=44, zoom_max=50)

plot_legend(["raw", "jpeg85", "jpeg75", "jpeg50", "jpeg25", "jpeg10", "jpeg05", "jpeg01"], "h2a_legend_1row", figsize=(8, 3), ncol=8)
plot_label("Accuracy (%)", "label_top1_h2a", figsize=(7.2, 4.5), width=(0, 0.3), height=ybbox)

### H2b: Time limit experiments

In [None]:
ybbox = plot_time_experiment([exp for exp in sorted(experiments) if "h2-time-inet-r50" in exp and "hdd-10h" in exp], "h2b_inet_r50_hdd_10h_time", 10, figsize=(6.5, 4.7))
plot_time_experiment([exp for exp in sorted(experiments) if "h2-time-inet-r50" in exp and "hdd-20h" in exp], "h2b_inet_r50_hdd_20h_time", 20, figsize=(6.5, 4.7))
plot_time_experiment([exp for exp in sorted(experiments) if "h2-time-inet-alex" in exp and "ssd-5h" in exp], "h2b_inet_alex_ssd_5h_time", 5, figsize=(6.5, 4.7))

plot_legend(["raw", "jpeg85", "jpeg75", "jpeg50", "jpeg25", "jpeg10", "Top-1 accuracy", "Top-5 accuracy"], "h2b_legend_1row", figsize=(10, 3), ncol=8)
plot_label("Accuracy (%)", "label_acc_h2b", figsize=(6.5, 4.7), width=(0, 0.28), height=ybbox)

### H2c: Storage limit experiments

In [None]:
ybbox = plot_space_experiment("inet-r50", "h2c_inet_r50", figsize=(6, 5))

plot_legend(["subsample", "jpeg85", "jpeg75", "jpeg50", "jpeg25", "jpeg10", "Top-1 accuracy", "Top-5 accuracy"], "h2c_legend_1row", figsize=(10, 3), ncol=8)
plot_label("Accuracy (%)", "label_acc_h2c", figsize=(6, 5), width=(0, 0.27), height=ybbox)

## H3: Training Regimens

In [None]:
ybbox = plot_regimen_experiment(
    [exp for exp in experiments if "h3" in exp and "45" in exp.split("-")[3].split(":")[1]],
    ["h2-acc-inet-r50-jpeg10-mem", "h2-acc-inet-r50-jpeg25-mem", "h2-acc-inet-r50-jpeg50-mem", "h2-acc-inet-r50-jpeg75-mem", "h2-acc-inet-r50-jpeg85-mem", "h2-acc-inet-r50-raw-mem"],
    "h3_simple",
    figsize=(12, 5.85),
    legend_loc="lower right",
    zoom_min=71,
    zoom_max=77
)
plot_regimen_experiment(
    [exp for exp in experiments if "h3" in exp and "80" in exp.split("-")[3].split(":")[1]],
    ["h2-acc-inet-r50-jpeg10-mem", "h2-acc-inet-r50-jpeg25-mem", "h2-acc-inet-r50-jpeg50-mem", "h2-acc-inet-r50-jpeg75-mem", "h2-acc-inet-r50-jpeg85-mem", "h2-acc-inet-r50-raw-mem"],
    "h3_transfer_learning",
    figsize=(12, 5.85),
    legend_loc="lower right",
    zoom_min=71,
    zoom_max=77
)

plot_legend(["b_raw", "b_jpeg85", "b_jpeg75", "b_jpeg50", "b_jpeg25", "b_jpeg10"], "h3_legend_1row", figsize=(10, 3), ncol=6)
plot_label("Top-1 accuracy (%)", "label_top1_h3", figsize=(12, 5.85), width=(0, 0.36), height=ybbox)

## H4: Orthogonality Analysis

In [None]:
plot_orthogonality_experiment(
    ["h4-inet-r50-hdd", "h4-inet-r18-hdd", "h4-inet-alex-hdd", "h4-inet-alex-ssd"],
    ["ResNet50 (HDD)", "ResNet18 (HDD)", "AlexNet (HDD)", "AlexNet (SSD)"],
    "h4_orthogonality",
    figsize=(18, 7)
)