In [12]:
from source.source.table_utils import (
    collect_scores_into_dict_miss,
    extract_same_different_dataframes,
    aggregate_over_measures,
)
import pandas as pd
from source.source.path_config import REPOSITORY_ROOT
from source.losses.constants import LossName
from source.metrics.constants import GName
from source.datasets.constants import DatasetName

import numpy as np
import os
from IPython.display import display

pd.set_option("display.max_rows", None)

In [2]:
full_mis_rocauc = pd.read_pickle(
    f"{REPOSITORY_ROOT}/tables/central_tables/full_mis_rocauc.pkl"
)

In [3]:
def selector(
    df,
    ind_dataset,
    architecture,
    UQMetric,
):
    arr = np.array(
        df[
            (df.UQMetric == UQMetric)
            & (df.training_dataset == ind_dataset)
            & (df.architecture == architecture)
        ].RocAucScores_array.values[0]
    )
    # print(arr)

    # return f"Mean: {arr.mean()}, Std: {arr.std()}"
    return float(arr.mean()), float(arr.std())

In [4]:
def get_specific_stats(
    ind_dataset_,
    architecture_,
    loss_function_,
    base_rule_,
):
    selected_results = full_mis_rocauc[
        (full_mis_rocauc.base_rule == base_rule_)
        & (full_mis_rocauc.LossFunction == loss_function_)
    ]

    full_res = {}
    res_dict = {}
    for uqmetric_name in [
        el for el in full_mis_rocauc.UQMetric.unique() if el.startswith(base_rule_)
    ]:
        mean, std = selector(
            df=selected_results,
            UQMetric=uqmetric_name,
            ind_dataset=ind_dataset_,
            architecture=architecture_,
        )
        res_dict[uqmetric_name] = {"mean": mean, "std": std}
    full_res = res_dict

    return full_res

In [19]:
# full_mis_rocauc['training_dataset'].unique()

In [22]:
ind_dataset = DatasetName.CIFAR10.value
architecture = "resnet18"

In [23]:
ce_full_res = get_specific_stats(
    ind_dataset_=ind_dataset,
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.LOG_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res).T

Unnamed: 0,mean,std
LogScore TotalRisk outer outer,0.944987,0.000637
LogScore TotalRisk outer inner,0.947026,0.000481
LogScore TotalRisk outer central,0.944045,0.000601
LogScore TotalRisk inner outer,0.944987,0.000637
LogScore TotalRisk inner inner,0.947026,0.000481
LogScore TotalRisk inner central,0.944045,0.000601
LogScore TotalRisk central outer,0.947387,0.000654
LogScore TotalRisk central inner,0.947749,0.000531
LogScore TotalRisk central central,0.947577,0.00092
LogScore ExcessRisk outer outer,0.94096,0.001368


In [24]:
ce_full_res = get_specific_stats(
    ind_dataset_=ind_dataset,
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.BRIER_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res).T

Unnamed: 0,mean,std
BrierScore TotalRisk outer outer,0.947254,0.000576
BrierScore TotalRisk outer inner,0.948151,0.000408
BrierScore TotalRisk outer central,0.948151,0.000408
BrierScore TotalRisk inner outer,0.947254,0.000576
BrierScore TotalRisk inner inner,0.948151,0.000408
BrierScore TotalRisk inner central,0.948151,0.000408
BrierScore TotalRisk central outer,0.947254,0.000576
BrierScore TotalRisk central inner,0.948151,0.000408
BrierScore TotalRisk central central,0.948151,0.000408
BrierScore ExcessRisk outer outer,0.943396,0.001331


In [None]:
full_mis_rocauc = full_mis_rocauc[~full_mis_rocauc.UQMetric.str.endswith("Inner Inner")]

full_mis_rocauc.loc[
    (full_mis_rocauc.RiskType == "Bayes")
    & full_mis_rocauc.UQMetric.str.endswith("Outer"),
    "RiskType",
] = "Bayes Outer"
full_mis_rocauc.loc[
    (full_mis_rocauc.RiskType == "Bayes")
    & full_mis_rocauc.UQMetric.str.endswith("Inner"),
    "RiskType",
] = "Bayes Inner"

full_mis_rocauc.loc[
    (full_mis_rocauc.RiskType == "Total")
    & full_mis_rocauc.UQMetric.str.endswith("Outer"),
    "RiskType",
] = "Total Outer"
full_mis_rocauc.loc[
    (full_mis_rocauc.RiskType == "Total")
    & full_mis_rocauc.UQMetric.str.endswith("Inner"),
    "RiskType",
] = "Total Inner"

In [None]:
# trunc_df = full_mis_rocauc[
# ~full_mis_rocauc.RiskType.isin(['Bias', 'MV', 'MVBI', 'BiasBI', 'Bregman Information', 'Reverse Bregman Information'])
# # full_mis_rocauc.base_rule.isin(['Brier', 'Logscore', 'Spherical']) &
# # full_mis_rocauc.LossFunction.isin(['Brier', 'Logscore', 'Spherical']) &
# # ~(np.isclose(full_mis_rocauc.RocAucScore, np.float64(0.5)))
# ]

# # trunc_df.sort_values(by='RocAucScore')

# trunc_df.to_csv(os.path.join('tables', 'full_mis_rocauc_only_risks.csv'), index=False)

In [None]:
grouped_df = extract_same_different_dataframes(
    dataframe_=full_mis_rocauc,
)

In [None]:
same_dict, _ = collect_scores_into_dict_miss(
    dataframes_list_=[
        grouped_df.logscore_logscore,
        grouped_df.brier_brier,
        grouped_df.spherical_spherical,
    ],
)
same_df = pd.DataFrame.from_dict(same_dict)

same_agg_df = aggregate_over_measures(
    dataframe_=same_df,
    agg_func_="mean",
    by_=["InD"],
)

In [None]:
different_dict, _ = collect_scores_into_dict_miss(
    dataframes_list_=[
        grouped_df.logscore_not_logscore,
        grouped_df.brier_not_brier,
        grouped_df.spherical_not_spherical,
    ],
)
different_df = pd.DataFrame.from_dict(different_dict)

different_agg_df = aggregate_over_measures(
    dataframe_=different_df,
    agg_func_="mean",
    by_=["InD"],
)

In [None]:
all_dict, _ = collect_scores_into_dict_miss(
    dataframes_list_=[
        full_mis_rocauc,
    ],
)
all_df = pd.DataFrame.from_dict(all_dict)

all_agg_df = aggregate_over_measures(
    dataframe_=all_df,
    agg_func_="mean",
    by_=["InD"],
)

In [None]:
display(all_agg_df)
display(same_agg_df)
display(different_agg_df)

In [None]:
same_agg_df.index

In [None]:
def enhance_latex_table(input_latex):
    lines = input_latex.split("\n")
    enhanced_lines = []

    for i, line in enumerate(lines):
        if "\\toprule" in line:
            # Add multicolumn headers
            enhanced_lines.append(line)
            enhanced_lines.append(
                r"\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\"
            )
            enhanced_lines.append(r"\cmidrule(lr){1-2} \cmidrule(lr){3-7}")
            continue

        # Add row coloring
        if "\\midrule" in line:
            enhanced_lines.append(line)
            enhanced_lines.append(r"\rowcolor{gray!10}")
        elif "\\bottomrule" in line:
            enhanced_lines.append(r"\end{tabular}")
        else:
            enhanced_lines.append(line)

    return "\n".join(enhanced_lines)


def get_nice_df(df_):
    df_.index = pd.Index(
        data=[
            "CIFAR10",
            "CIFAR100",
            "Missed class CIFAR10",
            "Noisy CIFAR10",
            "Noisy CIFAR100",
        ],
        name="InD",
    )
    df_.columns = [
        # 'Bayes',
        # 'Excess',
        # 'Total',
        "Bayes(O)",
        "Bayes(I)",
        "Total(O)",
        "Total(I)",
        "BI",
        "RBI",
        "EPBI",
        # 'Bias',
        # 'MV',
        # 'MVBI',
        # 'BiasBI',
    ]
    # df_ = df_[['Bayes', 'Excess', 'Total', 'BI', 'RBI']]
    df_ = (100 * df_).round(2)

    display(df_)

    return df_, df_.to_latex(float_format="%.2f")

In [None]:
measures = [
    "Bayes Outer",
    "Bayes Inner",
    "Total Outer",
    "Total Inner",
    "Bregman Information",
    "Reverse Bregman Information",
    "Expected Pairwise Bregman Information",
]

# measures = ['Bayes', 'Excess', 'Total', 'Bregman Information', 'Reverse Bregman Information', 'Expected Pairwise Bregman Information']

In [None]:
nice_same = get_nice_df(same_agg_df.copy()[measures])
print(enhance_latex_table(nice_same[1]))

In [None]:
nice_different = get_nice_df(different_agg_df.copy()[measures])
print(enhance_latex_table(nice_different[1]))

In [None]:
print(
    enhance_latex_table(
        pd.concat([nice_same[0], nice_different[0]], axis=1).to_latex(
            float_format="%.2f"
        )
    )
)

In [None]:
nice_same[0].mean()

In [None]:
nice_same[0].std()

In [None]:
nice_different[0].mean()

In [None]:
nice_different[0].std()

In [None]:
same_agg_df.eq(same_agg_df.max(axis=1), axis=0)

In [None]:
different_agg_df.eq(different_agg_df.max(axis=1), axis=0)

In [None]:
all_agg_df.eq(all_agg_df.max(axis=1), axis=0)

In [None]:
(same_agg_df - different_agg_df) > 0