In [None]:
from pathlib import Path

import pandas as pd
from IPython.display import Markdown, display

In [None]:
COLS = ["test/nn_class/f1", "test/psd/mean", "test/spd/mean"]
NEW_COLS = {
    "test/nn_class/f1": "1NN-F1",
    "test/psd/mean": "PSD",
    "test/spd/mean": "SPD",
}
MULTIPLIERS = [1, 10**2, 10**2]


def process_csv_file(csv_file: Path):
    results = pd.read_csv(csv_file)
    out = {}
    for col, mul in zip(COLS, MULTIPLIERS, strict=True):
        values = results[col] * mul
        mean_val = values.mean()
        std_val = values.std()
        metric_name = NEW_COLS[col]
        out[metric_name] = f"{mean_val:.3f} Â± {std_val:.4f}"
        out[f"{metric_name}_raw"] = mean_val
    return out


def bold_best_results(df: pd.DataFrame, metrics=("1NN-F1", "PSD", "SPD"), maximize=("1NN-F1",)):
    for metric in metrics:
        for dataset, group_idx in df.groupby("Dataset").groups.items():
            subset = df.loc[group_idx]
            raw_col = f"{metric}_raw"
            if metric in maximize:
                best_idx = subset[raw_col].idxmax()
            else:
                best_idx = subset[raw_col].idxmin()

            # Bold the corresponding makrdown string
            df.at[best_idx, metric] = f"**{df.at[best_idx, metric]}**"
    return df

# Main Results

In [None]:
def load_main_eval(eval_path: Path) -> pd.DataFrame:
    df_list = []
    for csv_file in sorted(eval_path.iterdir()):
        model, objective, dataset = csv_file.stem.split("_")
        temp_results = process_csv_file(csv_file)
        df_list.append({"Dataset": dataset, "Model": model, "Objective": objective, **temp_results})
    return pd.DataFrame(df_list)

main_results_path = Path("../outputs/eval_main")
main_eval_df = load_main_eval(main_results_path)
main_eval_df = bold_best_results(main_eval_df)

In [None]:
for ds in ["MED", "ABD", "MBA"]:
    print(ds)
    markdown_str = (
        main_eval_df[main_eval_df["Dataset"] == ds][["Model", "Objective", "1NN-F1", "PSD", "SPD"]]
        .sort_values(by=["Model", "Objective"])
        .to_markdown(index=False)
    )
    display(Markdown(markdown_str))

# Ablations

### $K$ Regions Ablation Study 

In [None]:
def load_kregion_ablation(eval_path: Path, base_path: Path) -> pd.DataFrame:
    df_list = []
    for csv_file in sorted(eval_path.iterdir()):
        model, objective, dataset, kregions = csv_file.stem.split("_")
        kregions = int(kregions.replace("KRegions=", ""))
        temp_results = process_csv_file(csv_file)
        df_list.append({"Dataset": dataset, "K": kregions, **temp_results})

    for csv_file in sorted(base_path.iterdir()):
        if "NicheFlow_GLVFM" not in csv_file.stem:
            continue
        _, _, dataset = csv_file.stem.split("_")
        temp_results = process_csv_file(csv_file)
        df_list.append({"Dataset": dataset, "K": 64, **temp_results})

    return pd.DataFrame(df_list)


kregions_results_path = Path("../outputs/eval_kregion_ablations")
kregion_df = load_kregion_ablation(kregions_results_path, main_results_path)
kregion_df = bold_best_results(kregion_df)

In [None]:
display(
    Markdown(
        kregion_df[["Dataset", "K", *list(NEW_COLS.values())]]
        .sort_values(by=["Dataset", "K"])
        .to_markdown(index=False)
    )
)

## $\lambda$ OT Ablations

In [None]:
def load_lambda_ablation(eval_path: Path, base_path: Path) -> pd.DataFrame:
    df_list = []
    for csv_file in sorted(eval_path.iterdir()):
        model, objective, dataset, ot_lambda = csv_file.stem.split("_")
        ot_lambda = float(ot_lambda.replace("OTLambda=", ""))
        temp_results = process_csv_file(csv_file)
        df_list.append(
            {
                "Dataset": dataset,
                "Model": model,
                "Objective": objective,
                "Lambda": ot_lambda,
                **temp_results,
            }
        )

    for csv_file in sorted(base_path.iterdir()):
        if not ("NicheFlow" in csv_file.stem or "RPCFlow" in csv_file.stem):
            continue
        model, objective, dataset = csv_file.stem.split("_")
        temp_results = process_csv_file(csv_file)
        df_list.append(
            {
                "Dataset": dataset,
                "Model": model,
                "Objective": objective,
                "Lambda": 0.1,
                **temp_results,
            }
        )

    return pd.DataFrame(df_list).sort_values(by=["Model", "Objective", "Lambda"])


ot_results_path = Path("../outputs/eval_ot_ablations")
lambda_df = load_lambda_ablation(ot_results_path, main_results_path)
lambda_df = bold_best_results(lambda_df)

In [None]:
for ds in ["MED", "ABD", "MBA"]:
    print(ds)
    markdown_str = (
        lambda_df[lambda_df["Dataset"] == ds][
            ["Model", "Objective", "Lambda", *list(NEW_COLS.values())]
        ]
        .sort_values(by=["Model", "Objective", "Lambda"])
        .to_markdown(index=False)
    )
    display(Markdown(markdown_str))