In [1]:
import os
os.chdir("/nas/ucb/oliveradk/diverse-gen/")

In [2]:
from collections import defaultdict
from pathlib import Path
import itertools
from typing import Optional

import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import numpy as np
import pandas as pd
from omegaconf import OmegaConf

from diverse_gen.utils.exp_utils import get_conf_dir
from diverse_gen.utils.proc_data_utils import get_exp_metrics, get_max_acc, get_acc_results

In [3]:
MAIN_DIR = "output/incomplete_waterbirds/main"
GROUP_LABELS_DIR = "output/incomplete_waterbirds/group_labels"
RESULTS_DIR = "results/incomplete_waterbirds"
os.makedirs(RESULTS_DIR, exist_ok=True)

In [4]:
METHODS = [
    "TopK_0.1", 
    "TopK_0.5", 
    "ERM", 
    "DBAT", 
    "DivDis"
]
SEEDS = [1, 2, 3]
DATASETS = ["waterbirds"]
MIX_RATES = [None]

In [26]:
def get_results(acc_metric: str, dir_path: str, methods: list[str], head_idx: Optional[int] = None):
    results = defaultdict(lambda: defaultdict(list))
    for dataset, method in itertools.product(DATASETS, methods):
        exp_dirs = []
        mix_rates = MIX_RATES
        model_selection = "val_loss" if method != "DBAT" else "val_source_loss"
        for mix_rate, seed in itertools.product(mix_rates, SEEDS):
            exp_dirs.append(get_conf_dir((dataset, method, mix_rate, seed), dir_path))
        perf_source_acc = dataset == "toy_grid"
        try: 
            results[dataset][method] = get_acc_results(
                exp_dirs=exp_dirs, acc_metric=acc_metric, model_selection=model_selection, 
                perf_source_acc=perf_source_acc, verbose=True, head_idx=head_idx
            )
        except Exception as e: 
            print(f"Error getting results for {dataset} {method}: {e}")
            raise e
    # recusrively convert default dict to dict
    return results

In [25]:
def to_df(results: dict[dict]): 
    # Create a list to store flattened data
    flattened_data = []

    # Iterate through the nested structure
    for dataset, method_dict in results.items():
        for method, mix_rate_dict in method_dict.items():
            for mix_rate, accuracies in mix_rate_dict.items():
                # For each accuracy value in the list
                for acc in accuracies:
                    flattened_data.append({
                        'Dataset': dataset,
                        'Method': method,
                        'Mix_Rate': mix_rate,
                        'Accuracy': acc
                    })

    # Create DataFrame
    df = pd.DataFrame(flattened_data)
    df.sort_values(['Dataset', 'Method', 'Mix_Rate'], inplace=True)
    return df 

In [7]:
import json 
metrics = json.load(open("output/incomplete_waterbirds/group_labels/waterbirds_DivDis_None/1/metrics.json"))

In [27]:
acc_df = to_df(get_results("test_acc", dir_path=MAIN_DIR, methods=METHODS))
alt_acc_df = to_df(get_results("test_acc_alt", dir_path=MAIN_DIR, methods=METHODS))
worst_acc_df = to_df(get_results("test_worst_acc", dir_path=MAIN_DIR, methods=METHODS))

acc_df.to_csv(os.path.join(RESULTS_DIR, "acc_df.csv"), index=False)
alt_acc_df.to_csv(os.path.join(RESULTS_DIR, "alt_acc_df.csv"), index=False)
worst_acc_df.to_csv(os.path.join(RESULTS_DIR, "worst_acc_df.csv"), index=False)

In [28]:
METHODS_NO_ERM = list(set(METHODS) - {"ERM"})
acc_gl_df = to_df(get_results(
    "test_acc", dir_path=GROUP_LABELS_DIR, methods=METHODS_NO_ERM, head_idx=0
))
alt_acc_gl_df = to_df(get_results(
    "test_acc", dir_path=GROUP_LABELS_DIR, methods=METHODS_NO_ERM, head_idx=1
))
worst_acc_gl_df = to_df(get_results(
    "test_worst_acc", dir_path=GROUP_LABELS_DIR, methods=METHODS_NO_ERM, head_idx=0
))

acc_gl_df.to_csv(os.path.join(RESULTS_DIR, "acc_gl_df.csv"), index=False)
alt_acc_gl_df.to_csv(os.path.join(RESULTS_DIR, "alt_acc_gl_df.csv"), index=False)
worst_acc_gl_df.to_csv(os.path.join(RESULTS_DIR, "worst_acc_gl_df.csv"), index=False)


In [16]:
metrics = json.load(open("output/incomplete_waterbirds/group_labels/waterbirds_DivDis_None/1/metrics.json"))

In [29]:
# Create DataFrame with all metrics
def print_latex_table(acc_df, alt_acc_df, worst_acc_df):
    df = pd.DataFrame({
        'Method': [],
        'Average Acc': [],
        'Alternative Acc': [],
        'Worst-Group Acc': []
    })

    for method in METHODS:
        avg_acc = f"{acc_df[acc_df['Method'] == method]['Accuracy'].mean()*100:.1f} ± {acc_df[acc_df['Method'] == method]['Accuracy'].std()*100:.1f}"
        alt_acc = f"{alt_acc_df[alt_acc_df['Method'] == method]['Accuracy'].mean()*100:.1f} ± {alt_acc_df[alt_acc_df['Method'] == method]['Accuracy'].std()*100:.1f}"
        worst_acc = f"{worst_acc_df[worst_acc_df['Method'] == method]['Accuracy'].mean()*100:.1f} ± {worst_acc_df[worst_acc_df['Method'] == method]['Accuracy'].std()*100:.1f}"
        
        df = pd.concat([df, pd.DataFrame({
            'Method': [method.replace("_", " ")],
            'Average Acc': [avg_acc],
            'Alternative Acc': [alt_acc],
            'Worst-Group Acc': [worst_acc]
        })], ignore_index=True)

    # Print LaTeX table
    print(df.to_latex(index=False, escape=True))

In [30]:

print_latex_table(acc_df, alt_acc_df, worst_acc_df)

\begin{tabular}{llll}
\toprule
Method & Average Acc & Alternative Acc & Worst-Group Acc \\
\midrule
TopK 0.1 & 88.9 ± 1.3 & 74.0 ± 3.6 & 53.9 ± 12.8 \\
TopK 0.5 & 92.8 ± 0.4 & 90.7 ± 1.1 & 70.9 ± 2.3 \\
ERM & 84.7 ± 2.9 & 63.1 ± 2.9 & 51.1 ± 10.2 \\
DBAT & 79.8 ± 0.9 & 70.2 ± 2.2 & 61.5 ± 3.7 \\
DivDis & 91.0 ± 3.2 & 72.9 ± 1.3 & 67.7 ± 8.1 \\
\bottomrule
\end{tabular}



In [31]:
print_latex_table(acc_gl_df, alt_acc_gl_df, worst_acc_gl_df)

\begin{tabular}{llll}
\toprule
Method & Average Acc & Alternative Acc & Worst-Group Acc \\
\midrule
TopK 0.1 & 87.9 ± 0.7 & 89.7 ± 2.6 & 54.3 ± 8.8 \\
TopK 0.5 & 92.5 ± 0.9 & 94.8 ± 0.5 & 75.6 ± 7.0 \\
ERM & nan ± nan & nan ± nan & nan ± nan \\
DBAT & 90.7 ± 1.8 & 91.6 ± 1.2 & 67.5 ± 9.0 \\
DivDis & 91.7 ± 1.4 & 92.4 ± 0.9 & 66.9 ± 3.2 \\
\bottomrule
\end{tabular}

