In [21]:
import os
from typing import Dict

import pandas as pd

In [22]:
def read_means(path: str, subdir: str, sf: str) -> pd.Series:
    return pd.read_csv(f"{path}/{subdir}/{sf}.csv")["mean"]


def read_scoring_functions(path: str) -> Dict[str, pd.DataFrame]:
    sfs = {
        "CELoss-S1": read_means(path, "CELoss", "CELoss-S1"),
        "CELoss-Seed": read_means(path, "CELoss", "CELoss-Seed"),
        "CVLoss-S1": read_means(path, "CVLoss", "CVLoss-S1"),
        "CVLoss-Seed": read_means(path, "CVLoss", "CVLoss-Seed"),
        "CumAcc-S1": read_means(path, "CumulativeAccuracy", "CumAcc-S1"),
        "CumAcc-Seed": read_means(path, "CumulativeAccuracy", "CumAcc-Seed"),
        "FIT-S1": read_means(path, "FirstIteration", "FIT-S1"),
        "FIT-Seed": read_means(path, "FirstIteration", "FIT-Seed"),
        "PD-S1": read_means(path, "PredictionDepth", "PD-S1"),
        "PD-Seed": read_means(path, "PredictionDepth", "PD-Seed"),
        "TT-B0-T": read_means(path, "TransferTeacher", "TT-B0-T"),
        "TT-Model": read_means(path, "TransferTeacher", "TT-Model"),
    }
    if "cifar" in path:
        sfs["C-Score"] = read_means(path, "Predefined", "C-Score")
    return sfs


def create_unique_max_bin_df(sfs: Dict[str, pd.DataFrame]) -> pd.DataFrame:
    records = []
    for sf, df in sfs.items():
        records.append(
            {
                "Scoring Function": sf,
                "Unique": df.nunique(),
                "Max Bin": df.value_counts().max(),
            }
        )
    return pd.DataFrame(records)

In [23]:
cifar_sfs = read_scoring_functions("results/cifar/curriculum")
cifar_df = create_unique_max_bin_df(cifar_sfs)
dcase_sfs = read_scoring_functions("results/dcase/curriculum")
dcase_df = create_unique_max_bin_df(dcase_sfs)

os.makedirs("results/tables", exist_ok=True)
cifar_df.to_csv("results/tables/4_b_3_difficulty_distributions_cifar.csv", index=False)
dcase_df.to_csv("results/tables/4_b_3_difficulty_distributions_dcase.csv", index=False)

In [24]:
cifar_df

Unnamed: 0,Scoring Function,Unique,Max Bin
0,CELoss-S1,17424,5164
1,CELoss-Seed,49844,10
2,CVLoss-S1,32872,768
3,CVLoss-Seed,50000,1
4,CumAcc-S1,33,18279
5,CumAcc-Seed,157,4033
6,FIT-S1,46,18279
7,FIT-Seed,34286,3824
8,PD-S1,20,21135
9,PD-Seed,196,10915


In [25]:
dcase_df

Unnamed: 0,Scoring Function,Unique,Max Bin
0,CELoss-S1,8402,470
1,CELoss-Seed,13962,1
2,CVLoss-S1,13262,22
3,CVLoss-Seed,13962,1
4,CumAcc-S1,32,1889
5,CumAcc-Seed,151,670
6,FIT-S1,48,1889
7,FIT-Seed,12384,670
8,PD-S1,21,4119
9,PD-Seed,203,2881
