In [None]:
import pandas as pd
import numpy as np

train_perc: float = 1
COLUMNS_TO_DROP = ["precision", "recall"]

imagenet_name: str = "imagenet-1k"
cifarfine_name: str = "cifar100-fine"
cifarcoarse_name: str = "cifar100-coarse"


def rearrange_embedtype_as_column(mydf):
    relative_out = mydf[mydf["embed_type"] == "relative"]
    relative_out.columns = pd.MultiIndex.from_tuples(
        [
            (
                "seed",
                "",
            ),
            ("embed_type", ""),
            ("train_model", ""),
            ("test_model", ""),
            ("Relative", "fscore"),
            ("stitched", ""),
        ],
    )

    absolute_out = mydf[mydf["embed_type"] == "absolute"]
    absolute_out.columns = pd.MultiIndex.from_tuples(
        [
            (
                "seed",
                "",
            ),
            ("embed_type", ""),
            ("train_model", ""),
            ("test_model", ""),
            ("Absolute", "fscore"),
            ("stitched", ""),
        ],
    )

    return pd.merge(
        relative_out.drop(columns=[("embed_type", "")]),
        absolute_out.drop(columns=[("embed_type", "")]),
        on=[
            ("train_model", ""),
            ("test_model", ""),
            ("seed", ""),
            ("stitched", ""),
        ],
    )


def read_df(dataset_name, train_perc):

    full_df = pd.read_csv(
        f"vision_transformer-stitching-{dataset_name}-{train_perc}.tsv",
        sep="\t",
        index_col=0,
    )

    full_df = full_df.drop(columns=COLUMNS_TO_DROP)

    full_df["fscore"] = full_df["fscore"] * 100
    full_df = rearrange_embedtype_as_column(full_df)

    return full_df


cifarcoarse = read_df(cifarcoarse_name, "1")

cifarfine = read_df(cifarfine_name, "1")

imagenet = read_df(imagenet_name, "0.2")

# CIFAR Coarse + ImageNet

In [None]:
cifarcoarse = read_df(cifarcoarse_name, "1")
imagenet = read_df(imagenet_name, "0.2")

cifarcoarse.columns = pd.MultiIndex.from_tuples(
    [
        ("seed", "", ""),
        ("train_model", "", ""),
        ("test_model", "", ""),
        ("Cifar100 Coarse", "Relative", "fscore"),
        ("stitched", "", ""),
        ("Cifar100 Coarse", "Absolute", "fscore"),
    ],
)
imagenet.columns = pd.MultiIndex.from_tuples(
    [
        ("seed", "", ""),
        ("train_model", "", ""),
        ("test_model", "", ""),
        ("imagenet", "Relative", "fscore"),
        ("stitched", "", ""),
        ("imagenet", "Absolute", "fscore"),
    ],
)

cifarimagenet = pd.merge(
    cifarcoarse,
    imagenet,
    how="outer",
    on=[
        ("seed", ""),
        ("train_model", ""),
        ("test_model", ""),
        ("stitched", ""),
    ],
)

In [None]:
def to_latex(df, label):
    return df.to_latex(
        escape=False,
        caption=f"Train perc: {train_perc} {label}",
        label=f"tab:multilingual-{label}",
        multirow=True,
        sparsify=True,
        multicolumn_format="c",
    )

In [None]:
pd.set_option("display.max_rows", None)


def formatter(mean, std):
    if isinstance(mean, str) or isinstance(std, str):
        return r"\multicolumn{1}{c}{-}"
    return f"${mean:.2f} \pm {std:.2f}$"

In [None]:
cifarimagenet.columns

In [None]:
from pathlib import Path

full_df = cifarimagenet

full_df = full_df[full_df["train_model", "", ""] != "cspdarknet53"]
full_df = full_df[full_df["test_model", "", ""] != "cspdarknet53"]

full_df["train_model", "", ""] = [x.replace("_", "-") for x in full_df["train_model", "", ""]]
full_df["test_model", "", ""] = [x.replace("_", "-") for x in full_df["test_model", "", ""]]

full_df = full_df.drop(columns=[("stitched", "", ""), ("seed", "", "")])
df = (
    full_df.groupby(
        [("train_model", "", ""), ("test_model", "", "")],
    )
    .agg([np.mean, np.std])
    .round(2)
)
df = df.fillna("-")

o = df.copy()

for dataset_name in ("Cifar100 Coarse", "imagenet"):
    for embed in (
        "Absolute",
        "Relative",
    ):
        for metric, new_name in (("fscore", "FScore"),):
            df[(dataset_name, embed, new_name, "")] = df.apply(
                lambda row: formatter(
                    row[(dataset_name, embed, metric, "mean")], row[(dataset_name, embed, metric, "std")]
                ),
                axis=1,
            )
            for agg in ("mean", "std"):
                df = df.drop(columns=[(dataset_name, embed, metric, agg)])

print(to_latex(df, "en"))
o

# CIFAR Fine

In [None]:
full_df = cifarfine

full_df = full_df[full_df["train_model", ""] != "cspdarknet53"]
full_df = full_df[full_df["test_model", ""] != "cspdarknet53"]

full_df["train_model", ""] = [x.replace("_", "-") for x in full_df["train_model", ""]]
full_df["test_model", ""] = [x.replace("_", "-") for x in full_df["test_model", ""]]

full_df = full_df.drop(columns=[("stitched", ""), ("seed", "")])
df = (
    full_df.groupby(
        [("train_model", ""), ("test_model", "")],
    )
    .agg([np.mean, np.std])
    .round(2)
)
df = df.fillna("-")

cols = [
    ("Absolute", "fscore", "mean"),
    ("Absolute", "fscore", "std"),
    ("Relative", "fscore", "mean"),
    ("Relative", "fscore", "std"),
]
df = df[cols]
o = df.copy()

for embed in (
    "Absolute",
    "Relative",
):
    for metric, new_name in (("fscore", "FScore"),):
        df[(embed, new_name, "")] = df.apply(
            lambda row: formatter(row[(embed, metric, "mean")], row[(embed, metric, "std")]),
            axis=1,
        )
        for agg in ("mean", "std"):
            df = df.drop(columns=[(embed, metric, agg)])

print(to_latex(df, "en"))
o