In [1]:
import os
import pandas as pd

In [2]:
approach_map = {
    "target-target": "ORACLE",
    "MLP cat": "Cat-ERM",
    "MLP uniform": "Avg-ERM",
    "Multisource SA": "SA",
    "Multisource SVM": "MK",
    "Multisource WCSC": "WCSC",
    "adaptation": "Proposed",
}

In [3]:
def load_results(path):
    baseline_df = pd.read_csv(
        os.path.join(path, "multienv_classification_baseline.csv"))
    proposed_df = pd.read_csv(
        os.path.join(path, "multienv_classification.csv"))
    proposed_df.rename({
        "task": "approach",
        "predict error.hard_acc": "acc",
        "predict error.auc": "aucroc",
        }, axis=1, inplace=True)
    proposed_df.drop("env_id", axis=1, inplace=True)

    return pd.concat([baseline_df, proposed_df], axis=0).reset_index(drop=True)

In [8]:
def summarize(path, task):
    df = load_results(os.path.join(path, f"task{task}"))
    df_groupby = df.groupby("approach")

    summary = []
    pretty_dict = {}
    pretty_dict["Task"] = f"Task {task}"
    for approach in approach_map.keys():
        pretty_dict[approach_map[approach]] = "{:.4f} ({:.4f})".format(
            df_groupby.mean().loc[approach, "aucroc"],
            df_groupby.std().loc[approach, "aucroc"],
        )
    summary.append(pretty_dict)

    return pd.DataFrame.from_records(summary)


# Load previously-generated data and hyperparameters from Google Drive

In [37]:
results_dir = "results_load_params"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary.reset_index(drop=True, inplace=True)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9413 (0.0062),0.8065 (0.0161),0.7993 (0.0165),0.7995 (0.0165),0.6233 (0.0419),0.5326 (0.0156),0.5863 (0.0300)
1,Task 2,0.9377 (0.0052),0.9145 (0.0078),0.9148 (0.0087),0.9148 (0.0087),0.8714 (0.0086),0.9079 (0.0105),0.9349 (0.0071)
2,Task 3,0.8915 (0.0128),0.8481 (0.0130),0.8430 (0.0129),0.8406 (0.0133),0.8161 (0.0233),0.7532 (0.0345),0.7640 (0.0102)


# Load previously-generated data from Google Drive

## Multiple seed model selection

In [40]:
results_dir = "results_load_drive/multiseed192"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary.reset_index(drop=True, inplace=True)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9422 (0.0078),0.8065 (0.0161),0.7993 (0.0165),0.7983 (0.0173),0.6807 (0.0897),0.5597 (0.0509),0.7302 (0.1450)


## Single seed model selection

In [38]:
results_dir = "results_load_drive/seed192"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary.reset_index(drop=True, inplace=True)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9434 (0.0096),0.8065 (0.0161),0.7993 (0.0165),0.7967 (0.0171),0.6233 (0.0419),0.5326 (0.0156),0.8701 (0.0218)


# Load data generated with the original data-generating code

## Multiple seed model selection

In [30]:
results_dir = "results_load_orig/multiseed192"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9445 (0.0047),0.8083 (0.0207),0.8003 (0.0207),0.8001 (0.0205),0.6916 (0.0868),0.5375 (0.0289),0.6308 (0.0945)


In [31]:
results_dir = "results_load_orig/multiseed1922"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9430 (0.0057),0.8017 (0.0144),0.7912 (0.0114),0.7908 (0.0113),0.6924 (0.0673),0.5269 (0.0136),0.6910 (0.1366)


## Single seed model selection

In [16]:
results_dir = "results_load_orig/seed192"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9442 (0.0047),0.8083 (0.0207),0.8003 (0.0207),0.8004 (0.0207),0.7828 (0.0662),0.5349 (0.0328),0.5931 (0.0344)


In [33]:
results_dir = "results_load_orig/seed1922"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9427 (0.0043),0.8017 (0.0144),0.7912 (0.0114),0.7891 (0.0114),0.6806 (0.1253),0.5273 (0.0142),0.5806 (0.0283)


# Run with the new data-generating code

## Multiple seed model selection

In [35]:
results_dir = "results_gen/multiseed192"
summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9437 (0.0082),0.8065 (0.0161),0.7993 (0.0165),0.7995 (0.0165),0.6215 (0.1018),0.5325 (0.0152),0.7514 (0.1405)


In [39]:
results_dir = "results_gen/multiseed1922"
summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9407 (0.0032),0.8122 (0.0112),0.8052 (0.0157),0.8051 (0.0155),0.6876 (0.0497),0.5517 (0.0197),0.6063 (0.0311)


## Single seed model selection

In [17]:
results_dir = "results_gen/seed192"
summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9443 (0.0062),0.7891 (0.0297),0.7796 (0.0266),0.7797 (0.0266),0.6001 (0.0613),0.5057 (0.0299),0.5686 (0.0358)


In [34]:
results_dir = "results_gen/seed1922"

summary = pd.DataFrame()
for t in range(1, 4, 3):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9407 (0.0032),0.8122 (0.0112),0.8052 (0.0157),0.8053 (0.0157),0.7268 (0.0129),0.5515 (0.0197),0.6063 (0.0311)
