# Calculate metrics

To illustrate the toolkit's ability to compare various synthesizer, let's calculate quality metrics for 4 distinct single-table datasets and 17 synthetic versions each, that were published as part of [Platzer and Reutterer (2021)](https://www.frontiersin.org/journals/big-data/articles/10.3389/fdata.2021.679939/full).

Note, that each generated report will take several minutes, thus total computation can take several hours.

In [None]:
import pandas as pd
from mostlyai import qa

qa.init_logging()  # initialize logging to stdout

path = "https://github.com/mostly-ai/paper-fidelity-accuracy/raw/refs/heads/main/data/"
datasets = ["adult", "bank-marketing", "credit-default", "online-shoppers"]
synthesizers = [
    "mostly",
    "mostly_e1",
    "mostly_e2",
    "mostly_e4",
    "mostly_e8",
    "flip10",
    "flip20",
    "flip30",
    "flip40",
    "flip50",
    "flip60",
    "flip70",
    "flip80",
    "flip90",
    "gretel",
    "synthpop",
    "ctgan",
]

# ENABLE HERE FOR REPRODUCING BENCHMARK RESULTS
if False:
    rows = []
    for dataset in datasets:
        tgt = pd.read_csv(path + f"{dataset}_trn.csv.gz")
        hol = pd.read_csv(path + f"{dataset}_val.csv.gz")
        for synthesizer in synthesizers:
            try:
                syn = pd.read_csv(path + f"{dataset}_{synthesizer}.csv.gz")
                print(dataset, synthesizer, tgt.shape, hol.shape, syn.shape)
                report_path, metrics = qa.report(
                    syn_tgt_data=syn,
                    trn_tgt_data=tgt,
                    hol_tgt_data=hol,
                )
                row = pd.json_normalize(metrics.model_dump(), sep="_")
                row.insert(0, "dataset", dataset)
                row.insert(1, "synthesizer", synthesizer)
                rows += [row]
            except Exception as e:
                print(e)

    df = pd.DataFrame(rows)
    df.to_csv("benchmark-examples.csv", index=False)

## Compare metrics

Calculate ratios with respect to holdout (north star), and visualize as a privacy-utility trade-off curve.

In [None]:
import pandas as pd

# load results
df = pd.read_csv("benchmark-examples.csv")
# calculate ratios with respect to Holdout
df["acc_ratio"] = df["accuracy_overall"] / df["accuracy_overall_max"]
df["sim_ratio"] = (
    df["similarity_cosine_similarity_training_synthetic"] / df["similarity_cosine_similarity_training_holdout"]
)
df["dcr_ratio"] = df["distances_dcr_training"] / df["distances_dcr_holdout"]
df

In [None]:
import matplotlib.pyplot as plt


def plot_dataset(df, dataset):
    # Define the color mapping for each synthesizer
    color_mapping = {
        "mostly": "blue",
        "mostly_e1": "#bdd7e7",
        "mostly_e2": "#6baed6",
        "mostly_e4": "#3182bd",
        "mostly_e8": "#215b85",
        "flip10": "#810f7c",
        "flip20": "#810f7c",
        "flip30": "#810f7c",
        "flip40": "#810f7c",
        "flip50": "#810f7c",
        "flip60": "#810f7c",
        "flip70": "#810f7c",
        "flip80": "#810f7c",
        "flip90": "#810f7c",
        "synthpop": "red",
        "gretel": "orange",
        "ctgan": "green",
    }

    def scatter_plot(ax, x, y, x_label, y_label):
        ax.set_xlabel(x_label)
        ax.set_ylabel(y_label)
        ax.grid(True)
        for _, row in df.iterrows():
            ax.scatter(row[x], row[y], color=color_mapping[row["synthesizer"]], s=100, label=row["synthesizer"])
        ax.axhline(y=1, color="black", linestyle="--")
        ax.axvline(x=1, color="black", linestyle="--")
        ax.scatter(1, 1, color="black", s=100, edgecolor="black", zorder=5)

    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    axes[0].set_title(f"{dataset} - Accuracy vs Distances")
    scatter_plot(axes[0], "acc_ratio", "dcr_ratio", "Accuracy Ratio (acc/acc_max)", "Distances Ratio (dcr_trn/dcr_hol)")

    axes[1].set_title(f"{dataset} - Similarity vs Distances")
    scatter_plot(
        axes[1],
        "sim_ratio",
        "dcr_ratio",
        "Similarity Ratio (sim_trn_syn/sim_trn_hol)",
        "Distances Ratio (dcr_trn/dcr_hol)",
    )

    handles, labels = axes[0].get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    fig.legend(by_label.values(), by_label.keys(), title="Synthesizer", bbox_to_anchor=(0.995, 0.98), loc="upper left")

    plt.tight_layout()
    # plt.savefig('fig_adult.png')

In [None]:
plot_dataset(df.loc[(df.dataset == "adult") & ~df.synthesizer.isin(["ctgan", "mostly_e1"])], "Adult")

In [None]:
plot_dataset(df.loc[(df.dataset == "bank-marketing") & ~df.synthesizer.isin(["ctgan", "mostly_e1"])], "Bank Marketing")

In [None]:
plot_dataset(df.loc[(df.dataset == "credit-default") & ~df.synthesizer.isin(["ctgan", "mostly_e1"])], "Credit Default")

In [None]:
plot_dataset(
    df.loc[(df.dataset == "online-shoppers") & ~df.synthesizer.isin(["ctgan", "mostly_e1"])], "Online Shoppers"
)