In [1]:
import os

import pandas as pd

In [6]:
_selector = ["curriculum.scoring.type", "curriculum.type", "test_accuracy"]
_baseline_sizes = [1, 3, 5, 15]
_column_mapping = {
    "curriculum.scoring.type": "SF",
    "curriculum.type": "Type",
    "test_accuracy": "Accuracy",
}
_sf_mapping = {
    "Predefined": "C-Score",
    "CELoss": "CELoss",
    "CumulativeAccuracy": "CumAcc",
    "PredictionDepth": "PD",
    "TransferTeacher": "TT",
    "Random": "Random",
}


def read_pacing_metrics(path: str, subdir: str = "summary") -> pd.DataFrame:
    metrics_df = pd.read_csv(f"{path}/{subdir}/metrics.csv")
    config_df = pd.read_csv(f"{path}/{subdir}/config.csv")
    df = pd.merge(metrics_df, config_df, on="run_name")
    return df


def select_baseline_runs(df: pd.DataFrame, dataset: str) -> pd.DataFrame:
    df = df[~df["curriculum"].notnull()]
    df = df[df["model"] == "EfficientNet-B0"]
    optim = "Adam" if dataset == "cifar" else "SAM-SGD-M9"
    lr = 0.001 if dataset == "cifar" else 0.01
    df = df[(df["optimizer"] == optim) & (df["learning_rate"] == lr)]
    df = df[_selector].sort_values("test_accuracy", ascending=False)
    records = []
    for s in _baseline_sizes:
        records.append(df.head(s).mean())
    df = pd.DataFrame(records).fillna("--")
    df["curriculum.type"] = [f"B{i}" for i in _baseline_sizes]
    df.rename(columns=_column_mapping, inplace=True)
    return df


def select_pacing_runs(df: pd.DataFrame) -> pd.DataFrame:
    def _assign_ordering_type(row) -> str:
        if row["curriculum"] == "AntiCurriculum":
            return "ACL"
        if row["curriculum.scoring.type"] == "Random":
            return "RCL"
        return "CL"

    df = df[df["curriculum"].notnull()]
    df = df[~df["curriculum.scoring"].str.contains("\+")]
    df = df[~df["run_name"].str.contains("-S\d")]
    df["curriculum.type"] = df.apply(_assign_ordering_type, axis=1)
    df = df[_selector].sort_values("test_accuracy", ascending=False)
    df.rename(columns=_column_mapping, inplace=True)
    df = df.sort_values("Accuracy", ascending=False).groupby("Type").head(1)
    df["SF"] = df["SF"].map(_sf_mapping)
    return df


def create_performance(dataset: str) -> pd.DataFrame:
    summary_df = read_pacing_metrics(f"results/{dataset}")
    baselines = select_baseline_runs(summary_df, dataset)

    agg_df = read_pacing_metrics(f"results/{dataset}", "agg_seed")
    pacing = select_pacing_runs(agg_df)
    return (
        pd.concat([baselines, pacing])
        .sort_values("Accuracy", ascending=False)
        .reset_index(drop=True)
    )

In [7]:
cifar_df = create_performance("cifar")
dcase_df = create_performance("dcase")
df = pd.concat([cifar_df, dcase_df], axis=1, keys=["CIFAR", "DCASE2020"])
os.makedirs("results/tables", exist_ok=True)
df.to_csv("results/tables/4_c_3_scoring_performance.csv", index=False)

In [4]:
df.round(3)

Unnamed: 0_level_0,CIFAR,CIFAR,CIFAR,DCASE2020,DCASE2020,DCASE2020
Unnamed: 0_level_1,SF,Type,Accuracy,SF,Type,Accuracy
0,C-Score,CL,0.844,--,B1,0.583
1,--,B1,0.839,TT,CL,0.577
2,--,B3,0.839,--,B3,0.576
3,--,B5,0.838,--,B5,0.571
4,--,B15,0.834,CELoss,ACL,0.56
5,Random,RCL,0.829,Random,RCL,0.558
6,CELoss,ACL,0.829,--,B15,0.555
