In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "" #"1"
os.chdir("/nas/ucb/oliveradk/diverse-gen/")

In [3]:
import json
from functools import partial
from itertools import product
from typing import Optional, Literal, Callable
from tqdm import tqdm
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from copy import deepcopy
from datetime import datetime
from collections import defaultdict

import submitit
from submitit.core.utils import CommandFunction
import nevergrad as ng
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go

from losses.loss_types import LossType
from utils.exp_utils import get_executor, get_executor_local, run_experiments, get_conf_dir
from utils.proc_data_utils import get_acc_results
from utils.utils import conf_to_args

In [4]:
SCRIPT_NAME = "spur_corr_exp.py"
PARENT_DIR = Path("output/real_data_exps")
SUBDIR = "2025-02-12_20-29-07" 
if SUBDIR is None: 
    SUBDIR = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
EXP_DIR = PARENT_DIR / SUBDIR
EXP_DIR.mkdir(parents=True, exist_ok=True)

In [5]:
seeds = [1, 2, 3]

# TODO: add aux weights based on tuining
method_configs = {
    "TopK 0.1": {"loss_type": LossType.TOPK, "mix_rate_lower_bound": 0.1, "aux_weight": 5, "mix_rate_schedule": "linear"},
    "TopK 0.5": {"loss_type": LossType.TOPK, "mix_rate_lower_bound": 0.5, "aux_weight": 3, "mix_rate_schedule": "linear"},
}


dataset_configs = {
    "waterbirds": {"dataset": "waterbirds", "model": "Resnet50", "epochs": 5, "source_cc": False, "batch_size": 32, "target_batch_size": 64},
}

for dataset_conf in dataset_configs.values():
    dataset_conf["mix_rate_t0"] = 0
    dataset_conf["mix_rate_t1"] = dataset_conf["epochs"]



configs = {
    (ds_name, method_name, seed): {**ds_config, **method_config, "seed": seed} 
    for (ds_name, ds_config), (method_name, method_config) in product(dataset_configs.items(), method_configs.items())
    for seed in seeds
}
    # return f"{ds}_{method}/{seed}" # TODO: fix

for (ds_name, method_name, seed), conf in configs.items():
    exp_dir = get_conf_dir((ds_name, method_name, seed), EXP_DIR)
    conf["exp_dir"] = exp_dir

# Run Experiments

In [8]:
executor = get_executor(EXP_DIR, mem_gb=16)
jobs = run_experiments(executor, list(configs.values()), SCRIPT_NAME)

In [9]:
print(jobs[0].stdout())

submitit INFO (2025-02-12 20:29:34,242) - Starting with JobEnvironment(job_id=815333_0, hostname=ppo.ist.berkeley.edu, local_rank=0(1), node=0(1), global_rank=0(1))
submitit INFO (2025-02-12 20:29:34,243) - Loading pickle: /nas/ucb/oliveradk/diverse-gen/output/real_data_exps/2025-02-12_20-29-07/815333_0_submitted.pkl
The following command is sent: "python spur_corr_exp.py dataset=waterbirds model=Resnet50 epochs=5 source_cc=False batch_size=32 target_batch_size=64 mix_rate_t0=0 mix_rate_t1=5 loss_type=TOPK mix_rate_lower_bound=0.1 aux_weight=5 mix_rate_schedule=linear seed=1 exp_dir=output/real_data_exps/2025-02-12_20-29-07/waterbirds_TopK 0.1/1"



# Plot Results

In [8]:
from collections import defaultdict
exps_by_method = defaultdict(list)
for (ds_name, method_name, seed), conf in configs.items():
    exps_by_method[method_name].append(conf)

results = {
    method_name: get_acc_results(method_exps, model_selection="val_loss", acc_metric="test_acc")
    for method_name, method_exps in exps_by_method.items()
}

results_alt = {
    method_name: get_acc_results(method_exps, model_selection="val_loss", acc_metric="test_acc_alt")
    for method_name, method_exps in exps_by_method.items()
}

results_worst = {
    method_name: get_acc_results(method_exps, model_selection="val_loss", acc_metric="test_worst_acc")
    for method_name, method_exps in exps_by_method.items()
}

In [11]:
results, results_alt, results_worst

({'TopK 0.1': {0.0: [0.8793579339981079,
    0.8391439318656921,
    0.8886779546737671]},
  'TopK 0.5': {0.0: [0.915774941444397,
    0.9178460240364075,
    0.8921297788619995]}},
 {'TopK 0.1': {0.0: [0.7359337210655212,
    0.7222989201545715,
    0.7414566874504089]},
  'TopK 0.5': {0.0: [0.8505350351333618,
    0.8729720115661621,
    0.9035208821296692]}},
 {'TopK 0.1': {0.0: [0.5841121673583984,
    0.41588786244392395,
    0.6479750871658325]},
  'TopK 0.5': {0.0: [0.7445482611656189,
    0.7757009267807007,
    0.7429906725883484]}})

In [9]:
# Create DataFrame with all metrics
df = pd.DataFrame({
    'Method': [],
    'Average Acc': [],
    'Alternative Acc': [],
    'Worst-Group Acc': []
})

for method in results.keys():
    avg_acc = f"{np.mean(results[method])*100:.1f} ± {np.std(results[method])*100:.1f}"
    alt_acc = f"{np.mean(results_alt[method])*100:.1f} ± {np.std(results_alt[method])*100:.1f}"
    worst_acc = f"{np.mean(results_worst[method])*100:.1f} ± {np.std(results_worst[method])*100:.1f}"
    
    df = pd.concat([df, pd.DataFrame({
        'Method': [method],
        'Average Acc': [avg_acc],
        'Alternative Acc': [alt_acc],
        'Worst-Group Acc': [worst_acc]
    })], ignore_index=True)

# Print LaTeX table
print(df.to_latex(index=False, escape=True))

TypeError: unsupported operand type(s) for /: 'dict' and 'int'