In [1]:
import os
import glob
import pickle
import time

os.environ["PRIVBAYES_BIN"] = "./ydnpd/harness/synthesis/privbayes/mac_bin"

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display, Markdown


import ydnpd
from additional import ADDITIONAL_EXPERIMENTS, ADDITIONAL_PATH
from ydnpd import EVALUATION_METRICS, ALL_EXPERIMENTS, Experiments

ADDITIONAL_DATASETS = sum(list(ADDITIONAL_EXPERIMENTS.values()), [])

with open("./results/harness.pkl", "rb") as f:
    utility_tasks_results = pickle.load(f)

for x in utility_tasks_results:
    if type(x["evaluation"]["error_rate_diff"]) == list:
        assert len(x["evaluation"]["error_rate_diff"]) == 1
        x["evaluation"]["error_rate_diff"] = x["evaluation"]["error_rate_diff"][0]

ALL_EXPERIMENTS_SIMPLIFIED = {name: exp.dev_names for name, exp in ALL_EXPERIMENTS.items()}
ADDITIONAL_EXPERIMENTS_SIMPLIFIED = {name: [x[0] for x in exp] for name, exp in ADDITIONAL_EXPERIMENTS.items()}
ALL_EXPERIMENTS_SIMPLIFIED_MERGED = {
    name: ADDITIONAL_EXPERIMENTS_SIMPLIFIED.get(name, []) + dev_names
    for name, dev_names in ALL_EXPERIMENTS_SIMPLIFIED.items()
}

reference_data_mapping = {
    "acs": "acs/national",
    "edad": "edad/2023",
    "we": "we/2023"
}

ALL_EXPERIMENTS_OBJ = {
    name: Experiments(reference_data_mapping.get(name, ""), dev_names)
    for name, dev_names in ALL_EXPERIMENTS_SIMPLIFIED_MERGED.items()
}

# remove all the real data and public data baselines,
# and now the MIX data
to_remove = [
    # 'acs/national',
    # 'edad/2023',
    # 'we/2023',
    'acs/massachusetts_upsampled',
    'edad/2020',
    'we/2018',
    ## MIX data
    # 'edad/gen-MIX-MAX',
    # 'acs/gen-MIX-MAX',
    # 'we/gen-MIX-MAX',
    # 'acs/gen-llama-MIX-MAX',
    # 'edad/gen-llama-MIX-MAX',
    # 'we/gen-llama-MIX-MAX',
    # 'acs/gen-claude-MIX-MAX',
    # 'edad/gen-claude-MIX-MAX',
    # 'we/gen-claude-MIX-MAX',
    # 'acs/gen-gpt-MIX-MAX',
    # 'edad/gen-gpt-MIX-MAX',
    # 'we/gen-gpt-MIX-MAX',
]

ALL_EXPERIMENTS_OBJ = {
    name: Experiments(exp.test_name, [dev for dev in exp.dev_names if dev not in to_remove])
    for name, exp in ALL_EXPERIMENTS_OBJ.items()
}

METRIC_DIRECTION = {
    "total_variation_distance": "closer_to_zero_is_better",
    "marginals_3_max_abs_diff_error": "closer_to_zero_is_better",
    "marginals_3_avg_abs_diff_error": "closer_to_zero_is_better",
    "thresholded_marginals_3_max_abs_diff_error": "closer_to_zero_is_better",
    "thresholded_marginals_3_avg_abs_diff_error": "closer_to_zero_is_better",
    "pearson_corr_max_abs_diff": "closer_to_zero_is_better",
    "pearson_corr_avg_abs_diff": "closer_to_zero_is_better",
    "cramer_v_corr_max_abs_diff": "closer_to_zero_is_better",
    "cramer_v_corr_avg_abs_diff": "closer_to_zero_is_better",
    "error_rate_diff": "closer_to_zero_is_better",  
    "error_rate_train_dataset": "closer_to_zero_is_better",
    "error_rate_synth_dataset": "closer_to_zero_is_better",
    "aoc_diff": "closer_to_zero_is_better",     
    "aoc_train_dataset": "higher_is_better",
    "aoc_synth_dataset": "higher_is_better",
}


  from .autonotebook import tqdm as notebook_tqdm
2025-01-24 07:36:37,885	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
ALL_EXPERIMENTS_OBJ

{'acs': Experiments(test_name='acs/national', dev_names=['acs/gen-MIX-MAX', 'acs/csv-gpt', 'acs/gen-gpt-MIX-MAX', 'acs/sdscm-gpt2', 'acs/gen-llama-MIX-MAX', 'acs/sdscm-olmo-1b-hf', 'acs/gen-gpt-MIX-UNIF', 'acs/gen-claude-MIX-MAX', 'acs/csv-claude', 'acs/gen-claude-MIX-UNIF', 'acs/gen-MIX-UNIF', 'acs/gen-llama-MIX-UNIF', 'acs/sdscm-llama-3-8b', 'acs/csv-llama', 'acs/baseline_univariate', 'acs/baseline_domain', 'acs/arbitrary']),
 'edad': Experiments(test_name='edad/2023', dev_names=['edad/gen-MIX-MAX', 'edad/csv-gpt', 'edad/gen-gpt-MIX-MAX', 'edad/sdscm-gpt2', 'edad/gen-llama-MIX-MAX', 'edad/sdscm-olmo-1b-hf', 'edad/gen-gpt-MIX-UNIF', 'edad/gen-claude-MIX-MAX', 'edad/csv-claude', 'edad/gen-claude-MIX-UNIF', 'edad/gen-MIX-UNIF', 'edad/gen-llama-MIX-UNIF', 'edad/sdscm-llama-3-8b', 'edad/csv-llama', 'edad/baseline_univariate', 'edad/baseline_domain', 'edad/arbitrary']),
 'we': Experiments(test_name='we/2023', dev_names=['we/gen-MIX-MAX', 'we/csv-gpt', 'we/gen-gpt-MIX-MAX', 'we/sdscm-gpt2',

## Absolute rankings

In [3]:
# suppress warnings
import warnings
warnings.filterwarnings("ignore")

column_to_rank_on = 'correspond_test'
for experiments_name, experiments in ALL_EXPERIMENTS_OBJ.items():

    display(Markdown(f"## {experiments_name.upper()}"))

    datasets = {}

    for metric in ydnpd.EVALUATION_METRICS:

        display(Markdown(f"### {metric}"))
        with pd.option_context("display.max_rows", None):
            try: 
                df_extract_all = ydnpd.UtilityTask.evaluate(utility_tasks_results, experiments, metric).multiply(100).round(2)
                
                # flatten df_extract_all
                df_extract_all = df_extract_all.reset_index()

                df_extract_all_smaller = df_extract_all[['synth_name', 'experiment', 'epsilon', column_to_rank_on]] 

                # rank based on lower or higher being better (closer_to_zero_is_better is default for lower_is_better)
                metric_direction = METRIC_DIRECTION[metric]

                df_extract_all_smaller.loc[:, "_rank_value"] = df_extract_all_smaller[column_to_rank_on]

                if metric_direction == "closer_to_zero_is_better":
                    df_extract_all_smaller.loc[:, "_rank_value"] = df_extract_all_smaller["_rank_value"].abs()
                    ascending = True
                    aggfunc = "min"
                elif metric_direction == "lower_is_better":
                    ascending = True
                    aggfunc = "min"
                else:  # "higher_is_better"
                    ascending = False
                    aggfunc = "max"

                idx = df_extract_all_smaller.groupby(
                    ['synth_name', 'experiment', 'epsilon']
                )["_rank_value"].idxmin() if ascending else df_extract_all_smaller.groupby(
                    ['synth_name', 'experiment', 'epsilon']
                )["_rank_value"].idxmax()

                df_best = df_extract_all_smaller.loc[idx].reset_index(drop=True)

                df_experiment_best = df_best.groupby(['synth_name', 'epsilon', 'experiment'])[column_to_rank_on].min().reset_index()
                df_experiment_best['rank'] = df_experiment_best.groupby(['synth_name', 'epsilon'])[column_to_rank_on].rank(method='dense', ascending=True)
                df_experiment_best = df_experiment_best.sort_values(['synth_name', 'epsilon', 'rank'])

                df_final = pd.merge(df_best, df_experiment_best[['synth_name', 'epsilon', 'experiment', 'rank']],
                                    on=['synth_name', 'epsilon', 'experiment'],
                                    how='left')

                df_final_avg = df_final.groupby(['experiment','synth_name'])['rank'].mean().reset_index()

                # for synth_name in df_final_avg['synth_name'].unique():
                df_final_avg.groupby(['experiment','synth_name'])['rank'].mean().reset_index()
                df_final_avg.to_csv(f"./results/all_rankings/{metric}_{experiments_name}.csv")
            except Exception as e:
                print(f"Error: {e}")
                continue



## ACS

### total_variation_distance

### marginals_3_max_abs_diff_error

### marginals_3_avg_abs_diff_error

### thresholded_marginals_3_max_abs_diff_error

### thresholded_marginals_3_avg_abs_diff_error

### pearson_corr_max_abs_diff

### pearson_corr_avg_abs_diff

### cramer_v_corr_max_abs_diff

### cramer_v_corr_avg_abs_diff

### error_rate_diff

### error_rate_train_dataset

### error_rate_synth_dataset

### aoc_diff

### aoc_train_dataset

### aoc_synth_dataset

## EDAD

### total_variation_distance

### marginals_3_max_abs_diff_error

### marginals_3_avg_abs_diff_error

### thresholded_marginals_3_max_abs_diff_error

### thresholded_marginals_3_avg_abs_diff_error

### pearson_corr_max_abs_diff

### pearson_corr_avg_abs_diff

### cramer_v_corr_max_abs_diff

### cramer_v_corr_avg_abs_diff

### error_rate_diff

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### error_rate_train_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### error_rate_synth_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### aoc_diff

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### aoc_train_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### aoc_synth_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


## WE

### total_variation_distance

### marginals_3_max_abs_diff_error

### marginals_3_avg_abs_diff_error

### thresholded_marginals_3_max_abs_diff_error

### thresholded_marginals_3_avg_abs_diff_error

### pearson_corr_max_abs_diff

### pearson_corr_avg_abs_diff

### cramer_v_corr_max_abs_diff

### cramer_v_corr_avg_abs_diff

### error_rate_diff

### error_rate_train_dataset

### error_rate_synth_dataset

### aoc_diff

### aoc_train_dataset

### aoc_synth_dataset

In [4]:
# slightly hacky, just grabbing all files in the rankings folder
RANKINGS_FOLDER = "./results/all_rankings"

ranking_files = glob.glob(os.path.join(RANKINGS_FOLDER, "*.csv"))

dfs = []

for file_path in ranking_files:
    file_name = os.path.basename(file_path) 
    file_stem, _ = os.path.splitext(file_name)

    parts = file_stem.split("_")
    experiments_name = parts[-1]           
    metric = "_".join(parts[:-1])          

    df = pd.read_csv(file_path)

    df["metric"] = metric
    df["experiments_name"] = experiments_name

    dfs.append(df)

all_ranks_df = pd.concat(dfs, ignore_index=True)

# ccreate a super ranking by averaging ranks across all metrics
# for each (experiment, synth_name) within each experiments_name 
super_rank_df = (
    all_ranks_df
    .groupby(["experiments_name", "experiment", "synth_name"], as_index=False)["rank"]
    .mean()
    .rename(columns={"rank": "super_rank"})
)

super_rank_df = super_rank_df.sort_values(["experiments_name", "super_rank"])

super_rank_df.to_csv(os.path.join(RANKINGS_FOLDER, "super_ranking.csv"), index=False)

print("super ranking saved as ", os.path.join(RANKINGS_FOLDER, "super_ranking.csv"))


super ranking saved as  ./results/all_rankings/super_ranking.csv


In [5]:
df_privbayes = super_rank_df[super_rank_df["synth_name"] == "privbayes"].copy()
df_gem       = super_rank_df[super_rank_df["synth_name"] == "gem"].copy()
df_aim_jax   = super_rank_df[super_rank_df["synth_name"] == "aim_jax"].copy()

In [6]:
df_privbayes[df_privbayes['experiments_name'] == 'acs']

Unnamed: 0,experiments_name,experiment,synth_name,super_rank
17,acs,acs/national/acs/csv-llama,privbayes,1.6
41,acs,acs/national/acs/gen-llama-MIX-UNIF,privbayes,1.733333
11,acs,acs/national/acs/csv-claude,privbayes,2.05
14,acs,acs/national/acs/csv-gpt,privbayes,2.05
29,acs,acs/national/acs/gen-claude-MIX-UNIF,privbayes,2.05
38,acs,acs/national/acs/gen-llama-MIX-MAX,privbayes,2.066667
26,acs,acs/national/acs/gen-claude-MIX-MAX,privbayes,2.233333
32,acs,acs/national/acs/gen-gpt-MIX-MAX,privbayes,2.233333
50,acs,acs/national/acs/sdscm-olmo-1b-hf,privbayes,2.233333
23,acs,acs/national/acs/gen-MIX-UNIF,privbayes,2.25


In [7]:
df_privbayes[df_privbayes['experiments_name'] == 'we']

Unnamed: 0,experiments_name,experiment,synth_name,super_rank
317,we,we/2023/we/sdscm-gpt2,privbayes,2.666667
299,we,we/2023/we/gen-claude-MIX-MAX,privbayes,2.833333
323,we,we/2023/we/sdscm-olmo-1b-hf,privbayes,2.933333
320,we,we/2023/we/sdscm-llama-3-8b,privbayes,2.983333
275,we,we/2023/we/arbitrary,privbayes,3.083333
302,we,we/2023/we/gen-claude-MIX-UNIF,privbayes,3.1
290,we,we/2023/we/csv-llama,privbayes,3.133333
287,we,we/2023/we/csv-gpt,privbayes,3.2
293,we,we/2023/we/gen-MIX-MAX,privbayes,3.25
314,we,we/2023/we/gen-llama-MIX-UNIF,privbayes,3.266667


In [8]:
df_privbayes[df_privbayes['experiments_name'] == 'edad']

Unnamed: 0,experiments_name,experiment,synth_name,super_rank
71,edad,edad/2023/edad/gen-MIX-MAX,privbayes,1.888889
65,edad,edad/2023/edad/csv-gpt,privbayes,2.111111
62,edad,edad/2023/edad/csv-claude,privbayes,2.138889
53,edad,edad/2023/edad/arbitrary,privbayes,2.166667
74,edad,edad/2023/edad/gen-MIX-UNIF,privbayes,2.305556
68,edad,edad/2023/edad/csv-llama,privbayes,2.333333
80,edad,edad/2023/edad/gen-claude-MIX-UNIF,privbayes,2.361111
77,edad,edad/2023/edad/gen-claude-MIX-MAX,privbayes,2.5
83,edad,edad/2023/edad/gen-gpt-MIX-MAX,privbayes,2.555556
86,edad,edad/2023/edad/gen-gpt-MIX-UNIF,privbayes,3.277778


In [9]:
super_rank_df["method"] = super_rank_df["experiment"].str.split("/").str[-1]

df_method_avg = (
    super_rank_df
    .groupby("method", as_index=False)["super_rank"]
    .mean()
    .rename(columns={"super_rank": "avg_super_rank"})
)

df_method_avg["super_super_rank"] = df_method_avg["avg_super_rank"].rank(method="dense", ascending=True)

df_method_avg = df_method_avg.sort_values("super_super_rank")

In [10]:
df_method_avg

Unnamed: 0,method,avg_super_rank,super_super_rank
11,gen-claude-MIX-MAX,2.422222,1.0
7,csv-gpt,2.524691,2.0
3,arbitrary,2.591975,3.0
10,gen-MIX-UNIF,2.624074,4.0
12,gen-claude-MIX-UNIF,2.675926,5.0
9,gen-MIX-MAX,2.681481,6.0
13,gen-gpt-MIX-MAX,2.707407,7.0
8,csv-llama,2.724691,8.0
6,csv-claude,2.767901,9.0
14,gen-gpt-MIX-UNIF,2.855556,10.0


## Calculating rankings based on advantage
### TODO: Currently buggy, need to fix.

In [15]:
# suppress warnings
import warnings
warnings.filterwarnings("ignore")

column_to_rank_on = 'correspond_test'
for experiments_name, experiments in ALL_EXPERIMENTS_OBJ.items():

    display(Markdown(f"## {experiments_name.upper()}"))

    datasets = {}

    for metric in ydnpd.EVALUATION_METRICS:

        display(Markdown(f"### {metric}"))
        with pd.option_context("display.max_rows", None):
            try: 
                df_extract_all = ydnpd.UtilityTask.evaluate(utility_tasks_results, experiments, metric).multiply(100).round(2)
                
                # flatten df_extract_all
                df_extract_all = df_extract_all.reset_index()

                df_extract_all_smaller = df_extract_all[['synth_name', 'experiment', 'epsilon', column_to_rank_on]] 

                metric_direction = METRIC_DIRECTION[metric]

                df_extract_all_smaller["_raw_value"] = df_extract_all_smaller[column_to_rank_on]

                grp = df_extract_all_smaller.groupby(["experiment", "epsilon"])["_raw_value"]

                if metric_direction in ["closer_to_zero_is_better", "lower_is_better"]:
                    # baseline is best  or lowest in that group
                    baseline_series = grp.transform("min")
                    df_extract_all_smaller["_advantage"] = (
                        baseline_series - df_extract_all_smaller["_raw_value"]
                    ) / baseline_series
                else:
                    # baseline is best or highest in that group
                    baseline_series = grp.transform("max")
                    df_extract_all_smaller["_advantage"] = (
                        df_extract_all_smaller["_raw_value"] - baseline_series
                    ) / baseline_series

                df_extract_all_smaller = df_extract_all_smaller.sort_values(
                    by=["experiment", "epsilon", "_advantage"],
                    ascending=[True, True, False],
                )

                df_extract_all_smaller["_advantage_rank"] = df_extract_all_smaller.groupby(
                    ["experiment", "epsilon"]
                ).cumcount() + 1

                df_experiment_best = (
                    df_extract_all_smaller
                    .groupby(["synth_name", "experiment"], as_index=False)["_advantage_rank"]
                    .min()
                )

                df_final_avg = (
                    df_experiment_best
                    .groupby(["experiment", "synth_name"])["_advantage_rank"]
                    .mean()
                    .reset_index()
                )

                df_final_avg.to_csv(f"./results/advantage_rankings/{metric}_{experiments_name}_advantage.csv")

            except Exception as e:
                print(f"Error: {e}")
                continue



## ACS

### total_variation_distance

### marginals_3_max_abs_diff_error

### marginals_3_avg_abs_diff_error

### thresholded_marginals_3_max_abs_diff_error

### thresholded_marginals_3_avg_abs_diff_error

### pearson_corr_max_abs_diff

### pearson_corr_avg_abs_diff

### cramer_v_corr_max_abs_diff

### cramer_v_corr_avg_abs_diff

### error_rate_diff

### error_rate_train_dataset

### error_rate_synth_dataset

### aoc_diff

### aoc_train_dataset

### aoc_synth_dataset

## EDAD

### total_variation_distance

### marginals_3_max_abs_diff_error

### marginals_3_avg_abs_diff_error

### thresholded_marginals_3_max_abs_diff_error

### thresholded_marginals_3_avg_abs_diff_error

### pearson_corr_max_abs_diff

### pearson_corr_avg_abs_diff

### cramer_v_corr_max_abs_diff

### cramer_v_corr_avg_abs_diff

### error_rate_diff

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### error_rate_train_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### error_rate_synth_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### aoc_diff

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### aoc_train_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


### aoc_synth_dataset

Error in extractor function:
 - synth_name: aim_jax
 - epsilon: 1
 - dev_name: edad/gen-MIX-MAX
 - test_name: edad/2023
 - Error: Cannot index by location index with a non-integer key
Error in evaluate method: Cannot index by location index with a non-integer key
Error: Cannot index by location index with a non-integer key


## WE

### total_variation_distance

### marginals_3_max_abs_diff_error

### marginals_3_avg_abs_diff_error

### thresholded_marginals_3_max_abs_diff_error

### thresholded_marginals_3_avg_abs_diff_error

### pearson_corr_max_abs_diff

### pearson_corr_avg_abs_diff

### cramer_v_corr_max_abs_diff

### cramer_v_corr_avg_abs_diff

### error_rate_diff

### error_rate_train_dataset

### error_rate_synth_dataset

### aoc_diff

### aoc_train_dataset

### aoc_synth_dataset

In [14]:
df_final_avg

Unnamed: 0,experiment,synth_name,_advantage_rank
0,we/2023/we/arbitrary,aim_jax,1.25
1,we/2023/we/arbitrary,gem,3.0
2,we/2023/we/arbitrary,privbayes,1.75
3,we/2023/we/baseline_domain,aim_jax,1.75
4,we/2023/we/baseline_domain,gem,3.0
5,we/2023/we/baseline_domain,privbayes,1.25
6,we/2023/we/baseline_univariate,aim_jax,1.25
7,we/2023/we/baseline_univariate,gem,3.0
8,we/2023/we/baseline_univariate,privbayes,1.75
9,we/2023/we/csv-claude,aim_jax,1.25
