In [None]:
import pandas as pd
import numpy as np

train_perc: float = 1
COLUMNS_TO_DROP = ["precision", "recall"]
dataset_name: str = "cifar100-coarse"


def read_df(train_perc):

    full_df = pd.read_csv(
        f"vision_transformer-stitching-{dataset_name}-{train_perc}.tsv",
        sep="\t",
        index_col=0,
    )
    return full_df


def rearrange_embedtype_as_column(mydf, domain):
    relative_out = mydf[mydf[("embed_type", "")] == "relative"]
    relative_out.columns = pd.MultiIndex.from_tuples(
        [
            ("seed", "", ""),
            ("embed_type", "", ""),
            ("train_model", "", ""),
            ("test_model", "", ""),
            ("Relative", "", "fscore"),
            ("stitched", "", ""),
        ],
    )
    absolute_out = mydf[mydf[("embed_type", "")] == "absolute"]
    absolute_out.columns = pd.MultiIndex.from_tuples(
        [
            ("seed", "", ""),
            ("embed_type", "", ""),
            ("train_model", "", ""),
            ("test_model", "", ""),
            ("Absolute", "", "fscore"),
            ("stitched", "", ""),
        ],
    )
    return pd.merge(
        relative_out.drop(columns=["embed_type"]),
        absolute_out.drop(columns=["embed_type"]),
        on=[
            ("train_model", "", ""),
            ("test_model", "", ""),
            ("seed", "", ""),
            ("stitched", "", ""),
        ],
    )


domain = "In Domain"
full_in_domain = read_df(train_perc=train_perc)
full_in_domain = full_in_domain.drop(columns=COLUMNS_TO_DROP)
full_in_domain["fscore"] = full_in_domain["fscore"] * 100
full_in_domain.columns = pd.MultiIndex.from_tuples(
    [
        ("seed", ""),
        ("embed_type", ""),
        ("train_model", ""),
        ("test_model", ""),
        # ('In Domain',  'precision'),
        # ('In Domain',     'recall'),
        ("", "fscore"),
        ("stitched", ""),
    ],
)
full_in_domain = rearrange_embedtype_as_column(full_in_domain, domain=domain)

full_df = full_in_domain


train_model = "Train Model"
test_model = "Test Model"
full_df = full_df.rename(columns={"train_model": train_model, "test_model": test_model})
full_df = full_df[
    [
        ("Train Model", "", ""),
        ("Test Model", "", ""),
        ("Absolute", "", "fscore"),
        ("Relative", "", "fscore"),
        #         ("Relative", "", "fscore"),
        #         ("Absolute", "", "fscore"),
    ]
].droplevel(1, axis=1)
# full_df = full_df.drop(columns=[("Absolute", "Out Domain")])
full_df

In [None]:
def to_latex(df, label):
    return df.to_latex(
        escape=False,
        caption=f"Train perc: {train_perc} Dataset: {dataset_name}",
        label=f"tab:multilingual-{label}-{dataset_name}",
        multirow=True,
        sparsify=True,
        multicolumn_format="c",
    )

In [None]:
pd.set_option("display.max_rows", None)
MEAN_STD_FORMAT = r"${:.2f} \pm {:.2f}$"

In [None]:
from pathlib import Path

full_df["Train Model", ""] = [x.replace("_", "-") for x in full_df["Train Model", ""]]
full_df["Test Model", ""] = [x.replace("_", "-") for x in full_df["Test Model", ""]]

df = (
    full_df.groupby(
        [(train_model, ""), (test_model, "")],
    )
    .agg([np.mean, np.std, "count"])
    .round(2)
)

o = df.copy()

for embed in (
    "Absolute",
    "Relative",
):
    for metric, new_name in (("fscore", "FScore"),):
        df[(embed, new_name, "")] = df.apply(
            lambda row: MEAN_STD_FORMAT.format(row[(embed, metric, "mean")], row[(embed, metric, "std")]),
            axis=1,
        )
        for agg in ("mean", "std"):
            df = df.drop(columns=[(embed, metric, agg)])

from IPython.display import Latex
from IPython.display import display

print(to_latex(df, "en"))
o