In [13]:
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor, as_completed

import wandb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Download the data

In [14]:
from collections import defaultdict


api = wandb.Api()
target_names = ["gaussian_mixture40", "many_well"]# , "planar_robot_4goal", "gaussian_mixture40"]
dims = [5, 32, 10, 50]

algorithm_names = [
    "tb",
    "gfnbuf",
    "gfnrbuf",
    "gfnlbuf",
    "gfnpiwbuf",
    "gfnsmc_flowdim64",
    "gfnsmcbuf_flowdim64",
    "gfnsmcrbuf_flowdim64",
    "gfnsmcpiwbuf_flowdim64"
]

wandb_tag_filter = {
    "$all": ["ablation_buf"],
    "$nin": ["hidden", "legacy"],
}

wandb_filter = {
    "tags": wandb_tag_filter,
    "config.algorithm_model_use_lp": False,
}

runs = defaultdict(dict)

# first key is target_name-dim
for target_name, dim in zip(target_names, dims):
    wandb_filter["config.target_name"] = target_name
    wandb_filter["config.target_dim"] = dim

    for algorithm_name in algorithm_names:
        wandb_filter["config.wandb_name"] = algorithm_name
        runs[f"{target_name}-{dim}d"][algorithm_name] = api.runs(f"sanghyeok-choi/sampling_bench", filters=wandb_filter)
        print(f"Number of runs in {target_name}-{dim}d/{algorithm_name}: {len(runs[f'{target_name}-{dim}d'][algorithm_name])}")


Number of runs in gaussian_mixture40-5d/tb: 5
Number of runs in gaussian_mixture40-5d/gfnbuf: 5
Number of runs in gaussian_mixture40-5d/gfnrbuf: 5
Number of runs in gaussian_mixture40-5d/gfnlbuf: 5
Number of runs in gaussian_mixture40-5d/gfnpiwbuf: 5
Number of runs in gaussian_mixture40-5d/gfnsmc_flowdim64: 5
Number of runs in gaussian_mixture40-5d/gfnsmcbuf_flowdim64: 5
Number of runs in gaussian_mixture40-5d/gfnsmcrbuf_flowdim64: 5
Number of runs in gaussian_mixture40-5d/gfnsmcpiwbuf_flowdim64: 5
Number of runs in many_well-32d/tb: 5
Number of runs in many_well-32d/gfnbuf: 5
Number of runs in many_well-32d/gfnrbuf: 5
Number of runs in many_well-32d/gfnlbuf: 5
Number of runs in many_well-32d/gfnpiwbuf: 0
Number of runs in many_well-32d/gfnsmc_flowdim64: 5
Number of runs in many_well-32d/gfnsmcbuf_flowdim64: 5
Number of runs in many_well-32d/gfnsmcrbuf_flowdim64: 5
Number of runs in many_well-32d/gfnsmcpiwbuf_flowdim64: 5


In [15]:
### Prepare dataframes

# Prepare metrics (columns)
metrics = ["KL/eubo", "KL/elbo", "logZ/reverse", "discrepancies/sd", "KL/eubo-elbo"]
metrics_std = [f"{m}_std" for m in metrics]

# make a dataframe with group_keys as multi-index and metrics as columns
metrics_dfs = {}
for target_name_dim in runs.keys():
    # row is algorithm_name
    # column is metrics_columns
    metrics_dfs[target_name_dim] = pd.DataFrame(columns=metrics + metrics_std) 

In [16]:

def process_group_key(alg_name, alg_runs, metrics, timesteps, n_seeds):
    # Save to numpy array first
    metrics_arr = np.zeros((n_seeds, len(metrics)))

    metrics_wo_gap = [m for m in metrics if m != "KL/eubo-elbo"]
    for i, run in enumerate(alg_runs):
        # Fetch last 5 metrics
        last_5_df = run.history(samples=timesteps[-1] + 1, keys=metrics_wo_gap)
        last_5_df.set_index("_step", inplace=True)
        last_5_values = last_5_df.loc[timesteps, metrics_wo_gap]
        last_5_values.loc[:, "KL/eubo-elbo"] = last_5_values["KL/eubo"] - last_5_values["KL/elbo"]
        metrics_arr[i] = last_5_values.values.mean(axis=0)

    return {
        'key': alg_name,
        'mean': metrics_arr.mean(axis=0),  # average over 5 seeds
        'std': metrics_arr.std(axis=0),  # std over 5 seeds
    }


last_5_timesteps = [19200, 19400, 19600, 19800, 19999]  # to be averaged
n_seeds = 5

for target_name_dim in runs.keys():
    print(f"Downloading runs for {target_name_dim}")

    target_runs = runs[target_name_dim]
    keys = target_runs.keys()
    with ThreadPoolExecutor(max_workers=min(32, len(runs[target_name_dim]))) as executor:
        futures = {
            executor.submit(
                process_group_key, 
                key,
                target_runs[key],
                metrics,
                last_5_timesteps,
                n_seeds,
            ): key for key in keys
        }

        for future in as_completed(futures):
            result = future.result()
            key = result['key']

            # Store results in dataframes
            metrics_dfs[target_name_dim].loc[key, metrics] = result['mean']
            metrics_dfs[target_name_dim].loc[key, metrics_std] = result['std']

Downloading runs for gaussian_mixture40-5d
Downloading runs for many_well-32d


### Main Results

In [17]:
metrics_dfs["gaussian_mixture40-5d"]

Unnamed: 0,KL/eubo,KL/elbo,logZ/reverse,discrepancies/sd,KL/eubo-elbo,KL/eubo_std,KL/elbo_std,logZ/reverse_std,discrepancies/sd_std,KL/eubo-elbo_std
gfnrbuf,2412.913555,-5.178561,-3.677309,3158.956328,2418.092116,216.382077,0.015256,0.040212,122.466032,216.388139
gfnlbuf,2488.261055,-5.179192,-3.681004,3324.253008,2493.440247,334.997895,0.014612,0.037458,129.561946,335.006911
gfnbuf,3266.584873,-5.185336,-3.680988,3614.836484,3271.770209,1268.692176,0.017176,0.039566,549.967779,1268.706345
gfnsmcbuf_flowdim64,5062.616328,-4.783566,-3.275308,3961.92709,5067.399894,4758.204121,0.330076,0.309244,751.38314,4758.425318
gfnpiwbuf,1183.338242,-4.484685,-2.987062,2813.899775,1187.822928,54.706816,0.013504,0.044822,133.096071,54.706906
gfnsmcpiwbuf_flowdim64,2.339992,-11.4642,0.005432,83.30928,13.804193,0.053392,0.588923,0.063267,10.108527,0.641873
tb,3156.692051,-5.191857,-3.685236,3110.164658,3161.883908,305.526451,0.01098,0.02875,121.352446,305.526269
gfnsmcrbuf_flowdim64,3037.920801,-4.516212,-3.003354,4028.699229,3042.437013,804.658245,0.035504,0.037013,819.119063,804.648119
gfnsmc_flowdim64,30.120313,-10.564371,-0.193709,330.907288,40.684684,18.63486,2.571558,0.121388,184.975226,16.650551


In [21]:
def print_latex_table(
    target_names: list[str],
    target_names_to_display_name: dict[str, str],
    metrics: list[str],
    metric_names: list[str],
    final_metrics_dfs: dict[str, pd.DataFrame],
):
    # header = f"{'Algorithm': <150}"
    # for target_name in target_names:
    #     name, dim = target_name.replace('_', '').split('-')
    #     for metric_name in metric_names:
    #         col = f"{name.upper()}({dim}) {metric_name}"
    #         header += f" & {col: <24}"
    # header += "\\\\"
    # print(header)

    # Print latex table with column: target_names[0]-elbo & target_names[0]-eubo & target_names[1]-elbo & energy_names[1]-eubo & ...
    indices = final_metrics_dfs[target_names[0]].index
    for df in final_metrics_dfs.values():
        if not np.all(df.index == indices):
            raise ValueError("All dataframes must have the same index")

    out = f"{'Algorithm': <20}"
    for target_name in target_names:
        for metric_name in metric_names:
            col = f"{target_names_to_display_name[target_name]} {metric_name}"
            out += f" & {col: <24}"
    out += "\n"

    for idx in indices:
        out += f"{idx: <20}\n"
        for target_name in target_names:
            temp_df = final_metrics_dfs[target_name].loc[idx]
            temp_df = final_metrics_dfs[target_name].loc[idx]
            for metric in metrics:
                val, std = temp_df[metric], temp_df[f"{metric}_std"]
                if val < -1e4 or val > 1e4:
                    # Convert to scientific notation
                    record = f"{val:.2e}\scriptsize$\pm${std:0.2e} \n"
                else:
                    record = f"{val:0.2f}\scriptsize$\pm${std:0.2f} \n"
                out += f"\t& {record: <24}"
        out += "\\\\"
        out += "\n"
    print(out)
    

analysis_name = "main"
lp = [False]

report_metrics = ["KL/elbo", "KL/eubo"]#, "discrepancies/sd"]
report_metric_names = ["ELBO", "EUBO"]#, "Sinkhorn"]
# metrics = ["KL/eubo-elbo", "discrepancies/sd"]
# metric_names = ["EUBO-ELBO", "Sinkhorn"]

run_name_to_display_name = OrderedDict({
    "tb": "TB",
    "gfnbuf": " + Buffer",
    "gfnrbuf": " + R-Buffer",
    "gfnlbuf": " + L-Buffer",
    "gfnpiwbuf": " + IW-Buffer",
    "gfnsmc_flowdim64": "TB/SubTB + SMC",
    "gfnsmcbuf_flowdim64": "z + Buffer",
    "gfnsmcrbuf_flowdim64": "z + R-Buffer",
    "gfnsmcpiwbuf_flowdim64": "z + IW-Buffer",
})

target_names_to_display_name = {
    "gaussian_mixture40-2d": "GMM40 (2d)",
    "gaussian_mixture40-5d": "GMM40 (5d)",
    "funnel-10d": "Funnel (10d)",
    "many_well-32d": "Many Well (32d)",
}

new_dfs = {key: df.copy() for key, df in metrics_dfs.items()}
for key, df in new_dfs.items():
    df.index = df.index.map(run_name_to_display_name)
    # Filter out keys that are not in run_name_to_display_name
    df = df[df.index.isin(list(run_name_to_display_name.values()))]
    print(run_name_to_display_name.values())
    df = df.reindex(list(run_name_to_display_name.values()))
    new_dfs[key] = df

# save_final_metrics_dfs_to_csv(energy_names, metrics, metric_names, new_df, analysis_name)
print_latex_table(
    list(runs.keys()),
    target_names_to_display_name,
    report_metrics,
    report_metric_names,
    new_dfs,
)


odict_values(['TB', ' + Buffer', ' + R-Buffer', ' + L-Buffer', ' + IW-Buffer', 'TB/SubTB + SMC', 'z + Buffer', 'z + R-Buffer', 'z + IW-Buffer'])
odict_values(['TB', ' + Buffer', ' + R-Buffer', ' + L-Buffer', ' + IW-Buffer', 'TB/SubTB + SMC', 'z + Buffer', 'z + R-Buffer', 'z + IW-Buffer'])
Algorithm            & GMM40 (5d) ELBO          & GMM40 (5d) EUBO          & Many Well (32d) ELBO     & Many Well (32d) EUBO    
TB                  
	& -5.19\scriptsize$\pm$0.01 
	& 3156.69\scriptsize$\pm$305.53 
	& 160.69\scriptsize$\pm$0.02 
	& 252.37\scriptsize$\pm$6.00 
\\
 + Buffer           
	& -5.19\scriptsize$\pm$0.02 
	& 3266.58\scriptsize$\pm$1268.69 
	& 162.68\scriptsize$\pm$0.13 
	& 170.06\scriptsize$\pm$3.51 
\\
 + R-Buffer         
	& -5.18\scriptsize$\pm$0.02 
	& 2412.91\scriptsize$\pm$216.38 
	& 161.23\scriptsize$\pm$2.31 
	& 172.36\scriptsize$\pm$3.51 
\\
 + L-Buffer         
	& -5.18\scriptsize$\pm$0.01 
	& 2488.26\scriptsize$\pm$335.00 
	& 162.65\scriptsize$\pm$0.09 
	& 173.26\scri