In [116]:
from collections import OrderedDict
from os import makedirs
from os.path import join
from json import load

from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu, ttest_ind, iqr, shapiro

In [117]:
logs_dir = "../logs/MarkedReconstructionModel"
report = "../report"
figures = join(report, "figures")
tables = join(report, "tables")
p = 0.05

makedirs(figures, exist_ok=True)
makedirs(tables, exist_ok=True)

In [213]:
def get_values(filename):
    return pd.read_csv(filename, index_col="step").iloc[:, 0].values

def get_val_ious(version):
    filename = join(logs_dir, version, "tensorboard", "val_iou_step.csv")
    return get_values(filename)

def get_summary(version):
    filename = join(logs_dir, version, "tensorboard", "summary.json")
    with open(filename, "r") as f:
        summary = load(f)
    return summary

def format(value):
    return "{:.3f}".format(value) if type(value) in [np.float64, float] else str(value)

def format_significance(value):
    return "\\textbf{" + format(value) + "}"

def highligh_best(df, k, higher=True):
    if higher:
        best, second = df[k].nlargest(2)
    else:
        best, second = df[k].nsmallest(2)
    df[k] = df[k].apply(lambda x: "\\textbf{" + format(x) + "}" if x == best else x)

# def highlight_significance(df, k, significance):
#     for i in df.index:
#         df.loc[i, k] = format_significance(df.loc[i, k]) if significance[i] else df.loc[i, k]

def highlight_significance(df, k, significance):
    last_significance = next((version for version in reversed(significance) if significance[version]), next(iter(significance.keys())))
    significant_versions = list(significance.keys())[list(significance.keys()).index(last_significance):]
    for i in df.index:
        if i in significant_versions:
            df.loc[i, k] = format_significance(df.loc[i, k])


def compare_versions(versions, name):
    last_values = None
    data = {}
    significance = OrderedDict()
    latex_filename = join(tables, f"versions-{name.replace(' ', '-')}.tex")
    for version in versions:
        summary = get_summary(version)
        iou_values = summary["val_iou_step"]["values"]
        significant = False
        if last_values is not None:
            significant = mannwhitneyu(iou_values, last_values).pvalue < p
        last_values = iou_values
        iou_median = summary["val_iou_step"]["median"]
        #iou_median = format_significance(iou_median) if significant else iou_median
        iou_iqr = summary["val_iou_step"]["iqr"]
        significance[versions[version]] = significant
        version_data = {
            ("Single curve \\acrshort{iou} $\\uparrow$", "median"): iou_median,
            ("Single curve \\acrshort{iou} $\\uparrow$", "\\acrshort{iqr}"): iou_iqr,
        }
        data[versions[version]] = version_data
    df = pd.DataFrame.from_dict(data, orient="index")
    df.index.name = name
    # highligh_best(df, df.columns[0])
    highlight_significance(df, df.columns[0], significance)
    with open(latex_filename, "w") as tex_file:
        df.to_latex(buf=tex_file, float_format="%.3f", multicolumn_format="c")
    return df

In [171]:
versions = {"142-hilr": "0.005", "144-rotation": "0.0005", "141-lowlr": "0.00005"}
compare_versions(versions, "learning rate")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
learning rate,Unnamed: 1_level_2,Unnamed: 2_level_2
0.005,0.527305,0.000365
0.0005,\textbf{0.609},0.007746
5e-05,\textbf{0.610},0.001358


In [173]:
versions = {"version_141": "no global pooling", "142-globalpool": "global pooling"}
compare_versions(versions, "global pooling")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
global pooling,Unnamed: 1_level_2,Unnamed: 2_level_2
no global pooling,0.61012,0.00235
global pooling,\textbf{0.611},0.000825


In [201]:
versions = {"141-weightpathloss": "curve length loss weight", "version_141": "no curve length loss weight"}
compare_versions(versions, "curve length loss weight")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
curve length loss weight,Unnamed: 1_level_2,Unnamed: 2_level_2
curve length loss weight,0.601902,0.001638
no curve length loss weight,\textbf{0.610},0.00235


In [184]:
versions = {"142-combined-dataaug": "without TU Berlin", "142-combined-dataaug-tuberl": "with TU Berlin"}
compare_versions(versions, "TU Berlin subset")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
TU Berlin subset,Unnamed: 1_level_2,Unnamed: 2_level_2
without TU Berlin,0.501465,0.0003
with TU Berlin,\textbf{0.613},0.002087


In [207]:
versions = {"version_141": "no data augmentation", "141-dataaug": "data augmentation"}
compare_versions(versions, "data augmentation")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
data augmentation,Unnamed: 1_level_2,Unnamed: 2_level_2
no data augmentation,0.61012,0.00235
data augmentation,\textbf{0.617},0.002039


In [215]:
versions = {"144-rotation-tonari-binarized": "binarized", "144-rotation": "non-binarized"}
compare_versions(versions, "binarization")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
binarization,Unnamed: 1_level_2,Unnamed: 2_level_2
binarized,\textbf{0.606},0.004388
non-binarized,\textbf{0.608},0.022613


In [219]:
versions = {"143-combined": "1:1", "144-rotation": "1:5"}
compare_versions(versions, "synthetic data ratio")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
synthetic data ratio,Unnamed: 1_level_2,Unnamed: 2_level_2
1:1,0.549623,0.015417
1:5,\textbf{0.608},0.022613


In [223]:
versions = {"143-justvecloss-sketchbench": "vector loss", "144-rotation": "vector + raster loss"}
compare_versions(versions, "loss")

Unnamed: 0_level_0,Single curve \acrshort{iou} $\uparrow$,Single curve \acrshort{iou} $\uparrow$
Unnamed: 0_level_1,median,\acrshort{iqr}
loss,Unnamed: 1_level_2,Unnamed: 2_level_2
vector loss,0.559696,0.010238
vector + raster loss,\textbf{0.608},0.022613
