In [1]:
import itertools as it
import os

os.environ["PRIVBAYES_BIN"] = "./ydnpd/harness/synthesis/privbayes/linux_bin"

import ray
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display, Markdown

import ydnpd
from additional import ADDITIONAL_PATH

In [2]:
ADDITIONAL_EXPERIMENTS = {}
ADDITIONAL_DATASETS = sum(list(ADDITIONAL_EXPERIMENTS.values()), [])

## Tasks

In [None]:
total_task_size = 0
for idx, task in enumerate(ydnpd.span_utility_tasks(additional_datasets=ADDITIONAL_DATASETS), start=1):
    print(f"{idx:2} {task}")
    total_task_size += task.size()
print(f"\nTotal task size: {total_task_size}")

## Execute Tasks

In [None]:
ydnpd.utils.get_compute_resources()

In [None]:
ray.init(num_cpus=24, num_gpus=4, ignore_reinit_error=True)

In [None]:
utility_tasks = ydnpd.span_utility_ray_tasks(additional_datasets=ADDITIONAL_DATASETS,
                                             with_wandb=False)
utility_tasks_results = ray.get(utility_tasks)
utility_tasks_results = list(it.chain(*utility_tasks_results))

## Utility-Related Tasks

In [None]:
for experiments_name, experiments in ydnpd.ALL_EXPERIMENTS.items():

    if (additional_experiment_datasets := ADDITIONAL_EXPERIMENTS.get(experiments_name)):
            additional_dataset_names = [dataset_name for dataset_name, _ in additional_experiment_datasets]
            experiments = ydnpd.Experiments(experiments.test_name,
                                    experiments.dev_names + additional_dataset_names)

    display(Markdown(f"## {experiments_name.upper()}"))

    datasets = {}
    for name in experiments.dev_names:
        try:
                df = ydnpd.load_dataset(name)[0]
        except ValueError:
                df = ydnpd.load_dataset(name, ADDITIONAL_PATH)[0]
        datasets[name] = df
    g = ydnpd.plot_distribution_distances(datasets)
    display(g.fig)

    ogs = (ydnpd.UtilityTask
         .plot_overall(utility_tasks_results,
                       experiments,
                       epsilon_reference=4)
                                     
    )

    for g in ogs:
          g.show()

    for metric in ydnpd.EVALUATION_METRICS:
        if metric not in utility_tasks_results[0]["evaluation"]:
            continue

        display(Markdown(f"### {metric}"))

        gs = ydnpd.UtilityTask.plot(utility_tasks_results, experiments, metric=metric)

        for g in gs:
            display(g.fig)
            plt.close(g.fig)

    for metric in ydnpd.EVALUATION_METRICS:
        if metric not in utility_tasks_results[0]["evaluation"]:
            continue

        display(Markdown(f"### {metric}"))
        with pd.option_context("display.max_rows", None):
            display(
                ydnpd.UtilityTask.evaluate(utility_tasks_results, experiments, metric)
                .multiply(100)
                .round(1)
            )