In [None]:
import os

os.environ["PRIVBAYES_BIN"] = "./ydnpd/harness/synthesis/privbayes/mac_bin"

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display, Markdown
import wandb
import tqdm

import ydnpd

In [None]:
ADDITIONAL_EXPERIMENTS = {"acs": [("acs/llm", "./llm_datasets")]}
ADDITIONAL_DATASETS = sum(list(ADDITIONAL_EXPERIMENTS.values()), [])

WANDB_GROUPS = ["test"]

## Tasks

In [None]:
total_task_size = 0
for idx, task in enumerate(ydnpd.span_utility_tasks(additional_datasets=ADDITIONAL_DATASETS), start=1):
    print(f"{idx:2} {task}")
    total_task_size += task.size()
print(f"\nTotal task size: {total_task_size}")

## Load Results from W&B

In [None]:
api = wandb.Api()

runs = api.runs(path=f"shlomihod/ydnpd")

utility_tasks_results = []

for run in tqdm.tqdm(runs):
    if run.group in WANDB_GROUPS:
        for step_data in run.history().to_dict(orient="records"):

            # TODO: why does this happen?
            if step_data["_step"] >= ydnpd.config.NUM_RUNS:
                print(f"Run {run.id} has more than {ydnpd.config.NUM_RUNS} steps")
                continue

            # TODO: why does this happen?
            for metric in step_data:
                if not metric.startswith("_") and step_data[metric] in ["NaN"]:
                    step_data[metric] = np.nan

            utility_tasks_results.append(dict(run.config) | {"evaluation": step_data})

In [None]:
# import copy
# orig_utility_tasks_results = copy.deepcopy(utility_tasks_results)
utility_tasks_results = [x for x in orig_utility_tasks_results if "pearson_corr_max_abs_diff" in x["evaluation"]]

## Utility-Related Tasks

In [None]:
for experiments_name, experiments in ydnpd.config.ALL_EXPERIMENTS.items():

    if (additional_experiment_datasets := ADDITIONAL_EXPERIMENTS.get(experiments_name)):
            additional_dataset_names = [dataset_name for dataset_name, _ in additional_experiment_datasets]
            experiments = ydnpd.harness.experiment.Experiments(experiments.test_name,
                                    experiments.dev_names + additional_dataset_names)

    display(Markdown(f"## {experiments_name}"))

    for metric in ydnpd.EVALUATION_METRICS:
        if metric not in utility_tasks_results[0]["evaluation"]:
            continue

        display(Markdown(f"### {metric}"))

        gs = ydnpd.UtilityTask.plot(utility_tasks_results, experiments, metric=metric)

        for g in gs:
            display(g.fig)
            plt.close(g.fig)

    for metric in ydnpd.EVALUATION_METRICS:
        if metric not in utility_tasks_results[0]["evaluation"]:
            continue

        display(Markdown(f"## {metric}"))
        with pd.option_context("display.max_rows", None):
            display(
                ydnpd.UtilityTask.evaluate(utility_tasks_results, experiments, metric)
                .multiply(100)
                .round(1)
            )