In [1]:
import os
import pandas as pd

In [2]:
approach_map = {
    "target-target": "ORACLE",
    "MLP cat": "Cat-ERM",
    "MLP uniform": "Avg-ERM",
    "Multisource SA": "SA",
    "Multisource SVM": "MK",
    "Multisource WCSC": "WCSC",
    "adaptation": "Proposed",
}

In [3]:
def load_results(path):
    baseline_df = pd.read_csv(
        os.path.join(path, "multienv_classification_baseline.csv"))
    proposed_df = pd.read_csv(
        os.path.join(path, "multienv_classification.csv"))
    proposed_df.rename({
        "task": "approach",
        "predict error.hard_acc": "acc",
        "predict error.auc": "aucroc",
        }, axis=1, inplace=True)
    proposed_df.drop("env_id", axis=1, inplace=True)

    return pd.concat([baseline_df, proposed_df], axis=0).reset_index(drop=True)

In [4]:
def summarize(path, task):
    df = load_results(os.path.join(path, f"task{task}"))
    df_groupby = df.groupby("approach")

    summary = []
    pretty_dict = {}
    pretty_dict["Task"] = f"Task {task}"
    for approach in approach_map.keys():
        pretty_dict[approach_map[approach]] = "{:.4f} ({:.4f})".format(
            df_groupby.mean().loc[approach, "aucroc"],
            df_groupby.std().loc[approach, "aucroc"],
        )
    summary.append(pretty_dict)

    return pd.DataFrame.from_records(summary)


# Load previously-generated data and hyperparameters from Google Drive

In [5]:
results_dir = "results_load_params"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary.reset_index(drop=True, inplace=True)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9413 (0.0062),0.8065 (0.0161),0.7993 (0.0165),0.7995 (0.0165),0.6233 (0.0419),0.5326 (0.0156),0.5863 (0.0300)
1,Task 2,0.9377 (0.0052),0.9145 (0.0078),0.9148 (0.0087),0.9148 (0.0087),0.8714 (0.0086),0.9079 (0.0105),0.9349 (0.0071)
2,Task 3,0.8915 (0.0128),0.8481 (0.0130),0.8430 (0.0129),0.8406 (0.0133),0.8161 (0.0233),0.7532 (0.0345),0.7640 (0.0102)


# Load previously-generated data from Google Drive

## Multiple seed model selection

In [17]:
results_dir = "results_load_drive/multiseed192"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary.reset_index(drop=True, inplace=True)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9422 (0.0078),0.8065 (0.0161),0.7993 (0.0165),0.7983 (0.0173),0.6807 (0.0897),0.5597 (0.0509),0.7302 (0.1450)
1,Task 2,0.9343 (0.0100),0.9145 (0.0078),0.9148 (0.0087),0.9146 (0.0089),0.8796 (0.0154),0.9101 (0.0075),0.9345 (0.0072)
2,Task 3,0.8899 (0.0124),0.8481 (0.0130),0.8430 (0.0129),0.8427 (0.0132),0.7789 (0.0550),0.6964 (0.0178),0.7956 (0.0510)


## Single seed model selection

In [18]:
results_dir = "results_load_drive/seed192"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary.reset_index(drop=True, inplace=True)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9434 (0.0096),0.8065 (0.0161),0.7993 (0.0165),0.7967 (0.0171),0.6233 (0.0419),0.5326 (0.0156),0.8701 (0.0218)
1,Task 2,0.9377 (0.0052),0.9145 (0.0078),0.9148 (0.0087),0.9148 (0.0087),0.8714 (0.0086),0.9101 (0.0074),0.9349 (0.0071)
2,Task 3,0.8915 (0.0128),0.8481 (0.0130),0.8430 (0.0129),0.8430 (0.0129),0.7776 (0.0489),0.6912 (0.0097),0.7640 (0.0102)


# Load data generated with the original data-generating code

## Multiple seed model selection

In [19]:
results_dir = "results_load_orig/multiseed192"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9445 (0.0047),0.8083 (0.0207),0.8003 (0.0207),0.8001 (0.0205),0.6916 (0.0868),0.5375 (0.0289),0.6308 (0.0945)
0,Task 2,0.9424 (0.0081),0.9148 (0.0088),0.9164 (0.0094),0.9161 (0.0095),0.8787 (0.0393),0.9102 (0.0108),0.9369 (0.0076)
0,Task 3,0.8838 (0.0151),0.8439 (0.0154),0.8386 (0.0136),0.8383 (0.0140),0.7938 (0.0359),0.7122 (0.0215),0.8062 (0.0443)


In [20]:
results_dir = "results_load_orig/multiseed1922"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9430 (0.0057),0.8017 (0.0144),0.7912 (0.0114),0.7908 (0.0113),0.6924 (0.0673),0.5269 (0.0136),0.6910 (0.1366)
0,Task 2,0.9394 (0.0084),0.9105 (0.0119),0.9116 (0.0116),0.9115 (0.0117),0.8667 (0.0212),0.9046 (0.0125),0.9332 (0.0100)
0,Task 3,0.8826 (0.0114),0.8443 (0.0150),0.8383 (0.0140),0.8375 (0.0147),0.7954 (0.0317),0.7019 (0.0173),0.7927 (0.0508)


## Single seed model selection

In [21]:
results_dir = "results_load_orig/seed192"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9442 (0.0047),0.8083 (0.0207),0.8003 (0.0207),0.8004 (0.0207),0.7828 (0.0662),0.5349 (0.0328),0.5931 (0.0344)
0,Task 2,0.9424 (0.0081),0.9148 (0.0088),0.9164 (0.0094),0.9164 (0.0094),0.8551 (0.0596),0.9103 (0.0108),0.9369 (0.0076)
0,Task 3,0.8852 (0.0154),0.8439 (0.0154),0.8386 (0.0136),0.8386 (0.0136),0.7963 (0.0706),0.7004 (0.0136),0.7700 (0.0141)


In [31]:
results_dir = "results_load_orig/seed1922"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9427 (0.0043),0.8017 (0.0144),0.7912 (0.0114),0.7891 (0.0114),0.6806 (0.1253),0.5273 (0.0142),0.5806 (0.0283)
0,Task 2,0.9394 (0.0084),0.9105 (0.0119),0.9116 (0.0116),0.9115 (0.0116),0.8296 (0.0664),0.9049 (0.0126),0.9332 (0.0100)
0,Task 3,0.8841 (0.0107),0.8443 (0.0150),0.8383 (0.0140),0.8360 (0.0146),0.8027 (0.0286),0.7019 (0.0173),0.7631 (0.0181)


# Run with the new data-generating code

## Multiple seed model selection

In [23]:
results_dir = "results_gen/multiseed192"
summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9437 (0.0082),0.7891 (0.0297),0.7796 (0.0266),0.7798 (0.0266),0.6658 (0.0933),0.5055 (0.0299),0.7514 (0.1405)
0,Task 2,0.9443 (0.0062),0.9120 (0.0140),0.9127 (0.0140),0.9126 (0.0139),0.8817 (0.0276),0.9061 (0.0179),0.9362 (0.0118)
0,Task 3,0.8886 (0.0142),0.8528 (0.0096),0.8476 (0.0092),0.8473 (0.0090),0.7586 (0.0682),0.7201 (0.0300),0.8242 (0.0499)


In [24]:
results_dir = "results_gen/multiseed1922"
summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9407 (0.0032),0.8122 (0.0112),0.8052 (0.0157),0.8051 (0.0155),0.6876 (0.0497),0.5517 (0.0197),0.6063 (0.0311)
0,Task 2,0.9407 (0.0032),0.9194 (0.0080),0.9197 (0.0078),0.9198 (0.0080),0.8818 (0.0282),0.9161 (0.0089),0.9385 (0.0076)
0,Task 3,0.8899 (0.0122),0.8516 (0.0178),0.8460 (0.0183),0.8459 (0.0183),0.7858 (0.0615),0.6977 (0.0228),0.7985 (0.0484)


## Single seed model selection

In [28]:
results_dir = "results_gen/seed192"
summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9443 (0.0062),0.7891 (0.0297),0.7796 (0.0266),0.7797 (0.0266),0.6001 (0.0613),0.5057 (0.0299),0.5686 (0.0358)
0,Task 2,0.9443 (0.0062),0.9120 (0.0140),0.9127 (0.0140),0.9127 (0.0140),0.8905 (0.0273),0.9060 (0.0180),0.9362 (0.0118)
0,Task 3,0.8845 (0.0137),0.8528 (0.0096),0.8476 (0.0092),0.8476 (0.0092),0.7410 (0.0988),0.7065 (0.0107),0.8680 (0.0119)


In [30]:
results_dir = "results_gen/seed1922"

summary = pd.DataFrame()
for t in range(1, 4):
    summary = pd.concat([summary, summarize(results_dir, t)], axis=0)
summary

Unnamed: 0,Task,ORACLE,Cat-ERM,Avg-ERM,SA,MK,WCSC,Proposed
0,Task 1,0.9407 (0.0032),0.8122 (0.0112),0.8052 (0.0157),0.8053 (0.0157),0.7268 (0.0129),0.5515 (0.0197),0.6063 (0.0311)
0,Task 2,0.9407 (0.0032),0.9194 (0.0080),0.9197 (0.0078),0.9202 (0.0077),0.8892 (0.0088),0.9160 (0.0089),0.9385 (0.0076)
0,Task 3,0.8918 (0.0131),0.8516 (0.0178),0.8460 (0.0183),0.8459 (0.0184),0.8025 (0.0267),0.6978 (0.0224),0.7673 (0.0225)
