In [2]:
import os 
os.chdir("/nas/ucb/oliveradk/diverse-gen/")

In [3]:
# imports from cc experiments
import json
from functools import partial
from itertools import product
from typing import Optional, Literal, Callable
from tqdm import tqdm
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
import copy
from datetime import datetime

import submitit
from submitit.core.utils import CommandFunction
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from losses.loss_types import LossType
from utils.exp_utils import get_executor, get_executor_local, run_experiments
from utils.utils import conf_to_args

In [4]:
EXP_DIR = "output/normal_imdb_data_exps/"
SCRIPT_PATH = "normal_imdb_data.py"

# Configs

In [6]:
# shared configs
base_config = {
    "batch_size": 32,
    "epochs": 3,
    "learning_rate": 2e-5
}

# seeds
seeds = [1, 2, 3]

# methods 
methods = {
    "DivDis": {"loss_type": LossType.DIVDIS, "aux_weight": 1.0},
    "TopK 0.1": {"loss_type": LossType.TOPK, "mix_rate_lower_bound": 0.1, "aux_weight": 1.0}, 
    "TopK 0.5": {"loss_type": LossType.TOPK, "mix_rate_lower_bound": 0.5, "aux_weight": 1.0}, 
    "ERM": {"loss_type": LossType.ERM, "aux_weight": 0.0}
}

def get_exp_dir(method_name, i):
    return Path(EXP_DIR, f"{method_name}/{i}")

# generate experiment configs
experiment_configs = []
for seed in seeds:
    for method_name, method_config in methods.items():
        config = copy.deepcopy(base_config)
        config.update(method_config)
        config["seed"] = seed
        config["exp_dir"] = get_exp_dir(method_name, seed)
        experiment_configs.append(config)

# Run Experiments

In [16]:
# executor = get_executor_local(out_dir=EXP_DIR)
# jobs = run_experiments(
#     executor=executor,
#     experiments=experiment_configs[:1],
#     script_name=SCRIPT_PATH,
# )

In [20]:
# run experiments
non_80gb_nodes = ["ddpg", "dqn", "gail", "gan","ppo", "vae"]
slurm_exclude = ",".join([f"{node}.ist.berkeley.edu" for node in non_80gb_nodes])
executor = get_executor(out_dir=EXP_DIR, slurm_exclude=slurm_exclude)
jobs = run_experiments(
    executor=executor,
    experiments=experiment_configs,
    script_name=SCRIPT_PATH,
)

In [23]:
print(jobs[0].stderr())

Using the latest cached version of the dataset since imdb couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'plain_text' at /nas/ucb/oliveradk/.cache/datasets/imdb/plain_text/0.0.0/e6281661ce1c48d982bc483cf8a173c1bbeb5d31 (last modified on Fri Jan 24 21:04:34 2025).

Epochs:   0%|          | 0/3 [00:00<?, ?it/s]

Train:   0%|          | 0/625 [00:00<?, ?it/s][A

Train:   0%|          | 1/625 [00:01<13:25,  1.29s/it][A

Train:   0%|          | 2/625 [00:01<09:16,  1.12it/s][A

Train:   0%|          | 3/625 [00:02<07:54,  1.31it/s][A

Train:   1%|          | 4/625 [00:03<07:19,  1.41it/s][A

Train:   1%|          | 5/625 [00:03<07:02,  1.47it/s][A

Train:   1%|          | 6/625 [00:04<06:48,  1.52it/s][A

Train:   1%|          | 7/625 [00:05<06:41,  1.54it/s][A

Train:   1%|▏         | 8/625 [00:05<06:33,  1.57it/s][A

Train:   1%|▏         | 9/625 [00:06<06:24,  1.60it/s][A

Train:   2%|▏         | 10/625 [00:06<06:26,  1.59it/s][A

Train

# Plot Results

In [7]:
from typing import Literal
def get_exp_metrics(conf: dict):
    if not (conf["exp_dir"] / "metrics.json").exists():
        raise FileNotFoundError(f"Metrics file not found for experiment {conf['exp_dir']}")
    with open(conf["exp_dir"] / "metrics.json", "r") as f:
        exp_metrics = json.load(f)
    return exp_metrics

def get_max_acc(
    exp_metrics: dict,
    acc_metric: Literal["test_acc", "test_worst_acc", "test_acc_alt"]="test_acc",
    model_selection: Literal["acc", "loss", "weighted_loss", "repulsion_loss"]="acc"
):
    max_accs = np.maximum(np.array(exp_metrics[f'{acc_metric}_1']), np.array(exp_metrics[f'{acc_metric}_2']))
    if model_selection == "acc": 
        max_acc_idx= np.argmax(max_accs)
    elif model_selection == "loss":
        max_acc_idx = np.argmin(exp_metrics["val_loss"])
    elif model_selection == "weighted_loss":
        max_acc_idx = np.argmin(exp_metrics["val_weighted_loss"])
    elif model_selection == "repulsion_loss":
        max_acc_idx = np.argmin(exp_metrics["target_val_weighted_repulsion_loss"])
    else: 
        raise ValueError(f"Invalid model selection: {model_selection}")
    max_acc = max_accs[max_acc_idx]
    return max_acc

# data structure: dictionary with keys method types, values dict[mix_rate, list[len(seeds)]] of cifar accuracies (for now ignore case where mix_rate != mix_rate_lower_bound)
def get_acc_results(
    exp_configs: list[dict],
    acc_metric: Literal["test_acc", "test_worst_acc", "test_acc_alt"]="test_acc",
    model_selection: Literal["acc", "loss", "weighted_loss", "repulsion_loss"]="acc",
    verbose: bool=False
):
    results = []
    for conf in exp_configs:
        try:
            exp_metrics = get_exp_metrics(conf)
            max_acc = get_max_acc(exp_metrics, acc_metric, model_selection)
            results.append(max_acc)
        except FileNotFoundError:
            if verbose:
                print(f"Metrics file not found for experiment {conf['exp_dir']}")
            continue
    return results

In [8]:
def method_from_config(conf: dict) -> str:
    if conf["loss_type"] == LossType.DIVDIS:
        return "DivDis"
    elif conf["loss_type"] == LossType.TOPK:
        return f"TopK {conf['mix_rate_lower_bound']}"
    elif conf["loss_type"] == LossType.ERM:
        return "ERM"
    else:
        raise ValueError(f"Invalid method: {conf['loss_type']}")


In [9]:
exps_by_method = defaultdict(list)
for conf in experiment_configs:
    exps_by_method[method_from_config(conf)].append(conf)

acc_results = defaultdict(list)
for method_name, exps in exps_by_method.items():
    results = get_acc_results(exps)
    acc_results[method_name] = results

acc_results = dict(acc_results)


In [10]:
# Calculate means and standard deviations
stats_dict = {
    method: {
        'mean': np.mean(scores) * 100,  # Convert to percentage
        'std': np.std(scores, ddof=1) * 100  # ddof=1 for sample standard deviation
    }
    for method, scores in acc_results.items()
}

# Create DataFrame
df = pd.DataFrame({
    'Method': list(stats_dict.keys()),
    'Accuracy': [f"{stats_dict[m]['mean']:.1f} ± {stats_dict[m]['std']:.1f}" for m in stats_dict.keys()]
})

# Generate LaTeX table
latex_table = df.to_latex(index=False, escape=False)
print(latex_table)

\begin{tabular}{ll}
\toprule
Method & Accuracy \\
\midrule
DivDis & 91.6 ± 0.2 \\
TopK 0.1 & 89.1 ± 1.0 \\
TopK 0.5 & 84.0 ± 2.8 \\
ERM & 91.8 ± 0.1 \\
\bottomrule
\end{tabular}

