# Evaluating Synthetic Data

This notebook includes the code used to evaluate the experiments and generate the `agg_df` dataframe used in the paper.

In [3]:
import sys

sys.path.append("..")

In [None]:
from pathlib import Path
from glob import iglob
from instasynth import evaluation, embedding_generation
import pandas as pd

real_data = (
    pd.read_pickle("../data/df_sample.pkl")
    .sample(1000, random_state=42)
    .dropna()
    .query("caption ! = ''")
)
emb_storage = embedding_generation.EmbeddingStorage(
    Path("../embeddings/"), embedding_file_name="embeddings.pkl"
)
experiment_paths = [Path(f) for f in iglob("../results/*") if "archive" not in f]

test_spons_data = pd.read_pickle("../data/kim_sample_mini.pkl")
ann_data = pd.read_pickle("../data/ann_sample_ad_detection.pkl")


evaluator = evaluation.ExperimentEvaluator(
    experiment_paths=experiment_paths,
    test_dataset_ads=test_spons_data,
    test_dataset_ads_undisclosed=ann_data,
    real_dataset=real_data.copy(),
    embedding_storage=emb_storage,
)

evaluator.load_experiment_metrics()
evaluator.load_real_dataset_metrics()

aggregated_df = evaluator.aggregate_metrics()

Adding the averaged metrics from the real data

In [11]:
metrics_bootstrap_real = (
    pd.read_pickle("../data/metrics_bootstrap_real.pkl").loc["Real"].to_dict()
)

for col in aggregated_df.columns:
    aggregated_df.at["Real", col] = metrics_bootstrap_real.get(
        col, aggregated_df.at["Real", col]
    )

Calculating the metrics diff

In [5]:
evaluator._real_dataset_metrics = aggregated_df.loc["Real"].to_dict()
difference_df = evaluator.compare_metrics()

Adding missing metrics

In [6]:
internal_cosine_sim_diff = pd.DataFrame(
    aggregated_df["real_internal_cosine_sim"]
    - aggregated_df["synthetic_internal_cosine_sim"],
    columns=["internal_cosine_sim"],
).T
diff_df = pd.concat([difference_df.copy(), internal_cosine_sim_diff], axis=0)
diff_df.drop(columns=["Real"], inplace=True)

Aggregating the results

In [7]:
columns_ignore = [
    c
    for c in aggregated_df.columns
    if "request" in c
    or "_rate" in c
    or "_internal_cosine_sim" in c
    or "number_of_errors" in c
]
columns_clf = [
    c
    for c in aggregated_df.columns
    if "ad_detection" in c or "pct_unique_captions" in c
]
columns_na = diff_df.T.columns[diff_df.T.isna().all()].tolist()
columns_compare = list(set(columns_na) - set(columns_ignore))
columns_argmax = columns_clf + columns_compare
columns_argmin = [
    c for c in diff_df.T.columns if c not in columns_argmax and c not in columns_ignore
]

In [8]:
diff_df.to_pickle("diff_df.pkl")
aggregated_df.to_pickle("aggregated_df.pkl")