## Seeding Experiment
The notebook was used to compare optimizations that were identically configured expect for the type of seeding. We compared random seeding, static seeding and transfer learning.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB.tb_highlight = "bg:#3e0054"

In [None]:
import os

if not os.getcwd().endswith("syftr"):
    os.chdir(os.path.dirname(os.getcwd()))
    print(f"Changed working directory to: {os.getcwd()}")

In [None]:
import pandas as pd

from syftr.configuration import cfg

SUCCESS_RATE = 0.9
N_RANDOM_TRIALS = 23
N_TRIALS_RESULT = 1000
SHOW_TITLE = True

RESULTS_DIR = cfg.paths.results_dir / "seeding"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

STORAGE = cfg.database.get_optuna_storage()

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [None]:
EXPORT_FILENAME = "seeding1--testing--bright-biology"

STUDY_NAMES = [
    "seeding1--testing-random--bright_hf--biology",
    "seeding1--testing-silver--bright_hf--biology",
    "seeding1--testing-transfer--bright_hf--biology",
]

NAMES = {
    "seeding1--testing-random--bright_hf--biology": "Bright Biology\n(Random Seeding)",
    "seeding1--testing-silver--bright_hf--biology": "Bright Biology\n(Silver Bullets)",
    "seeding1--testing-transfer--bright_hf--biology": "Bright Biology\n(Transfer Learning)",
}

LABELS = {
    "seeding1--testing-random--bright_hf--biology": "Random Seeding",
    "seeding1--testing-silver--bright_hf--biology": "Silver Bullets",
    "seeding1--testing-transfer--bright_hf--biology": "Transfer Learning",
}

COLORS = {
    "seeding1--testing-random--bright_hf--biology": "tomato",
    "seeding1--testing-silver--bright_hf--biology": "dodgerblue",
    "seeding1--testing-transfer--bright_hf--biology": "limegreen",
}

DATASET_NAME = "Bright Biology"

In [None]:
# EXPORT_FILENAME = "seeding1--testing--drdocs_hf"

# STUDY_NAMES = [
#     "seeding1--testing-random--drdocs_hf",
#     "seeding1--testing-silver--drdocs_hf",
#     "seeding1--testing-transfer--drdocs_hf",
# ]

# NAMES = {
#     "seeding1--testing-random--drdocs_hf": "DRDocs\n(Random Seeding)",
#     "seeding1--testing-silver--drdocs_hf": "DRDocs\n(Silver Bullets)",
#     "seeding1--testing-transfer--drdocs_hf": "DRDocs\n(Transfer Learning)",
# }

# LABELS = {
#     "seeding1--testing-random--drdocs_hf": "Random Seeding",
#     "seeding1--testing-silver--drdocs_hf": "Silver Bullets",
#     "seeding1--testing-transfer--drdocs_hf": "Transfer Learning",
# }

# COLORS = {
#     "seeding1--testing-random--drdocs_hf": "tomato",
#     "seeding1--testing-silver--drdocs_hf": "dodgerblue",
#     "seeding1--testing-transfer--drdocs_hf": "limegreen",
# }

# DATASET_NAME = "DRDocs"

In [None]:
# EXPORT_FILENAME = "seeding1--testing--infinitebench_hf--longbook_qa_eng"

# STUDY_NAMES = [
#     "seeding1--testing-random--infinitebench_hf--longbook_qa_eng",
#     "seeding1--testing-silver--infinitebench_hf--longbook_qa_eng",
#     "seeding1--testing-transfer--infinitebench_hf--longbook_qa_eng",
# ]

# NAMES = {
#     "seeding1--testing-random--infinitebench_hf--longbook_qa_eng": "InfiniteBench\n(Random Seeding)",
#     "seeding1--testing-silver--infinitebench_hf--longbook_qa_eng": "InfiniteBench\n(Silver Bullets)",
#     "seeding1--testing-transfer--infinitebench_hf--longbook_qa_eng": "InfiniteBench\n(Transfer Learning)",
# }   

# LABELS = {
#     "seeding1--testing-random--infinitebench_hf--longbook_qa_eng": "Random Seeding",
#     "seeding1--testing-silver--infinitebench_hf--longbook_qa_eng": "Silver Bullets",
#     "seeding1--testing-transfer--infinitebench_hf--longbook_qa_eng": "Transfer Learning",
# }

# COLORS = {
#     "seeding1--testing-random--infinitebench_hf--longbook_qa_eng": "tomato",
#     "seeding1--testing-silver--infinitebench_hf--longbook_qa_eng": "dodgerblue",
#     "seeding1--testing-transfer--infinitebench_hf--longbook_qa_eng": "limegreen",
# }

# DATASET_NAME = "InfiniteBench"

In [None]:
# EXPORT_FILENAME = "seeding1--testing--phantomwikiv050_hf--depth_20_size_10000_seed_3"

# STUDY_NAMES = [
#     "seeding1--testing-random--phantomwikiv050_hf--depth_20_size_10000_seed_3",
#     "seeding1--testing-silver--phantomwikiv050_hf--depth_20_size_10000_seed_3",
#     "seeding1--testing-transfer--phantomwikiv050_hf--depth_20_size_10000_seed_3",
# ]

# NAMES = {
#     "seeding1--testing-random--phantomwikiv050_hf--depth_20_size_10000_seed_3": "PhantomWiki\n(Random Seeding)",
#     "seeding1--testing-silver--phantomwikiv050_hf--depth_20_size_10000_seed_3": "PhantomWiki\n(Silver Bullets)",
#     "seeding1--testing-transfer--phantomwikiv050_hf--depth_20_size_10000_seed_3": "PhantomWiki\n(Transfer Learning)",
# }   

# LABELS = {
#     "seeding1--testing-random--phantomwikiv050_hf--depth_20_size_10000_seed_3": "Random Seeding",
#     "seeding1--testing-silver--phantomwikiv050_hf--depth_20_size_10000_seed_3": "Silver Bullets",
#     "seeding1--testing-transfer--phantomwikiv050_hf--depth_20_size_10000_seed_3": "Transfer Learning",
# }

# COLORS = {
#     "seeding1--testing-random--phantomwikiv050_hf--depth_20_size_10000_seed_3": "tomato",
#     "seeding1--testing-silver--phantomwikiv050_hf--depth_20_size_10000_seed_3": "dodgerblue",
#     "seeding1--testing-transfer--phantomwikiv050_hf--depth_20_size_10000_seed_3": "limegreen",
# }

# DATASET_NAME = "PhantomWiki"

In [None]:
import optuna

dfs = []

for study_name in STUDY_NAMES:
    study = optuna.load_study(study_name=study_name, storage=STORAGE)
    df_tmp = study.trials_dataframe()
    df_tmp["study_name"] = study_name
    df_tmp = df_tmp[df_tmp["number"] < N_TRIALS_RESULT]
    dfs.append(df_tmp)

df = pd.concat(dfs)

In [None]:
import numpy as np
from paretoset import paretoset


def safe_paretoset(df: pd.DataFrame, sense):
    assert df.shape[1] == 2
    df = df.replace([np.inf, -np.inf], np.nan)
    if len(df.dropna(how='any')) == 0:
        return pd.Series(False, index=df.index)
    pareto = paretoset(df, sense=sense, use_numba=False)
    pareto = pd.Series(pareto, index=df.index)
    return pareto

def pareto_area(df_values, limits, sense):
    xmin, xmax = limits[1]
    ymin, ymax = limits[0]
    total_area = (xmax - xmin) * (ymax - ymin)

    df_pareto = df_values[safe_paretoset(df_values, sense=sense)].copy()
    df_pareto = df_pareto[
        (df_pareto.iloc[:, 1] >= xmin)
        & (df_pareto.iloc[:, 1] <= xmax)
        & (df_pareto.iloc[:, 0] >= ymin)
        & (df_pareto.iloc[:, 0] <= ymax)
    ]
    if len(df_pareto) == 0:
        return 0.0
    df_pareto.sort_values(by=[df_pareto.columns[1], df_pareto.columns[0]], inplace=True)

    dominated_area = 0.0
    for i in range(1, len(df_pareto)):
        y1, x1 = df_pareto.iloc[i - 1]
        y2, x2 = df_pareto.iloc[i]
        x1, x2 = np.clip([x1, x2], xmin, xmax)
        y1, y2 = np.clip([y1, y2], ymin, ymax)
        dominated_area += (x2 - x1) * (y1 - ymin)
    last_y, last_x = df_pareto.iloc[-1]
    last_y = np.clip(last_y, ymin, ymax)
    last_x = np.clip(last_x, xmin, xmax)
    dominated_area += (xmax - last_x) * (last_y - ymin)
    return dominated_area / total_area

def cumulative_pareto_area(df, limits, sense):
    df_values = df.sort_values(by="datetime_complete")[["values_0", "values_1"]]
    areas = pd.Series(index=df_values.index, dtype=float)
    for i, idx in enumerate(df_values.index):
        areas.loc[idx] = pareto_area(df_values.iloc[: i + 1, :], limits, sense)
    return areas

In [None]:
import matplotlib.pyplot as plt

USE_IDENTICAL_SEEDING = True
MIN_TRIALS = 1
LINEWIDTH = 2

alpha = 0.8
figsize = (10, 8)
fig, ax = plt.subplots(2, 2, figsize=figsize)

min_accuracy = 1
max_accuracy = 0

for i, study_name in enumerate(STUDY_NAMES):
    df_study = df[df["study_name"] == study_name].copy()
    n_trials = len(df_study)
    if n_trials == 0:
        continue

    print(f"Study {study_name} has {n_trials} trials")
    
    if "random" in study_name:
        max_seeding = N_RANDOM_TRIALS
    else:
        max_seeding = df_study[df_study["user_attrs_is_seeding"] == True]["number"].max() + 1

    # accuracy
    accuracy = [
        df_study[
            (df_study["number"] <= n)
            & (df_study["user_attrs_metric_num_success"] >= SUCCESS_RATE * df_study["user_attrs_metric_num_total"].max())
        ]["values_0"].max() for n in range(n_trials)
    ]
    min_accuracy = min(0.99 * min_accuracy, min(accuracy[MIN_TRIALS - 1:]))
    max_accuracy = max(0.99 * max_accuracy, min(1.05 * max(accuracy[MIN_TRIALS - 1:]), 1.01))
    ax[0,0].plot(
        df_study["number"] + 1,
        accuracy,
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
        linewidth=LINEWIDTH,
    )
    if not USE_IDENTICAL_SEEDING:
        ax[0,0].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")
    elif i == len(STUDY_NAMES) - 1:
        ax[0,0].axvline(N_RANDOM_TRIALS, color="gray", linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")

    # cumulative cost
    cumulative_cost = [
        df_study[df_study["number"] <= n]["user_attrs_metric_llm_cost_total"].sum() for n in range(n_trials)
    ]
    ax[0,1].plot(
        df_study["number"] + 1,
        cumulative_cost,
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
        linewidth=LINEWIDTH,
    )
    if not USE_IDENTICAL_SEEDING:
        ax[0,1].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")
    elif i == len(STUDY_NAMES) - 1:
        ax[0,1].axvline(N_RANDOM_TRIALS, color="gray", linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")
    
    # minimum latency
    min_latency = [
        df_study[
            (df_study["number"] <= n)
            & (df_study["user_attrs_metric_num_success"] >= SUCCESS_RATE * df_study["user_attrs_metric_num_total"].max())
        ]["user_attrs_metric_p80_time"].min() for n in range(n_trials)
    ]
    ax[1,0].plot(
        df_study["number"] + 1,
        min_latency,
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
        linewidth=LINEWIDTH,
    )
    if not USE_IDENTICAL_SEEDING:
        ax[1,0].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")
    elif i == len(STUDY_NAMES) - 1:
        ax[1,0].axvline(N_RANDOM_TRIALS, color="gray", linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")

    # Pareto area
    finite_0 = df["values_0"].apply(np.isfinite)
    finite_1 = df["values_1"].apply(np.isfinite)
    limits = [
        df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
        df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
    ]
    ax[1,1].plot(
        df_study["number"] + 1,
        100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"]),
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
        linewidth=LINEWIDTH,
    )
    if not USE_IDENTICAL_SEEDING:
        ax[1,1].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")
    elif i == len(STUDY_NAMES) - 1:
        ax[1,1].axvline(N_RANDOM_TRIALS, color="gray", linestyle="--", linewidth=LINEWIDTH, alpha=0.7, label="Seeding ends")

if SHOW_TITLE:
    ax[0,0].set_title("Accuracy")
ax[0,0].set_xlabel("Number of Trials")
ax[0,0].set_ylabel("Maximum Accuracy")
ax[0,0].legend(loc="lower right", fontsize="small", framealpha=1)
ax[0,0].set_xlim(MIN_TRIALS, 1000)
ax[0,0].set_xscale("log")
ax[0,0].set_ylim(min_accuracy, max_accuracy)

if SHOW_TITLE:
    ax[0,1].set_title("Cost")
ax[0,1].set_xlabel("Number of Trials")
ax[0,1].set_ylabel("Cumulative Cost ($)")
ax[0,1].legend(loc="upper left", fontsize="small", framealpha=1)
ax[0,1].set_xscale("log")
ax[0,1].set_xlim(MIN_TRIALS, 1000)

if SHOW_TITLE:
    ax[1,0].set_title("Latency")
ax[1,0].set_xlabel("Number of Trials")
ax[1,0].set_ylabel("Minimum P80 Latency (Second)")
ax[1,0].legend(loc="upper right", fontsize="small", framealpha=1)
ax[1,0].set_xscale("log")
ax[1,0].set_yscale("log")
ax[1,0].set_xlim(MIN_TRIALS, 1000)

if SHOW_TITLE:
    ax[1,1].set_title("Pareto Area")
ax[1,1].set_xlabel("Number of Trials")
ax[1,1].set_ylabel("Pareto Area (%)")
ax[1,1].legend(loc="lower right", fontsize="small", framealpha=1)
ax[1,1].set_xscale("log")
ax[1,1].set_ylim(40, 100)
ax[1,1].set_xlim(MIN_TRIALS, 1000)

if SHOW_TITLE:
    plt.suptitle(f"Accuracy/Latency-Optimization Progress for Different Seeding Types ({DATASET_NAME})")

plt.tight_layout()
plt.savefig(RESULTS_DIR / f"{EXPORT_FILENAME}-plot.pdf", dpi=300, bbox_inches="tight")
plt.savefig(RESULTS_DIR / f"{EXPORT_FILENAME}-plot.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# alpha = 0.8
# bar_width = 0.7
# figsize = (8, 6)
# fig, ax = plt.subplots(2, 2, figsize=figsize)

# for study_name in STUDY_NAMES:
#     df_study = df[df["study_name"] == study_name].copy()
#     n_trials = len(df_study)
#     if n_trials == 0:
#         continue

#     df_study = df_study[df_study["user_attrs_metric_num_success"] >= SUCCESS_RATE * df_study["user_attrs_metric_num_total"].max()]
    
#     if "random" in study_name:
#         df_study = df_study[df_study["number"] < N_RANDOM_TRIALS]
#     else:
#         df_study = df_study[df_study["user_attrs_is_seeding"] == True]

#     # accuracy
#     accuracy = df_study["values_0"].max()

#     ax[0,0].bar(
#         NAMES[study_name],
#         accuracy,
#         color=COLORS[study_name],
#         alpha=alpha,
#         width=bar_width
#     )

#     ax[0,0].text(
#         NAMES[study_name],
#         accuracy,
#         f'{accuracy:.2f}',
#         ha='center',
#         va='bottom'
#     )

#     # cumulative cost
#     cumulative_cost = df_study["user_attrs_metric_llm_cost_total"].sum()
#     ax[0,1].bar(
#         NAMES[study_name],
#         cumulative_cost,
#         color=COLORS[study_name],
#         alpha=alpha,
#         width=bar_width
#     )
#     ax[0,1].text(
#         NAMES[study_name],
#         cumulative_cost,
#         f'${cumulative_cost:.2f}',
#         ha='center',
#         va='bottom'
#     )

#     # minimum latency
#     min_latency = df_study["user_attrs_metric_p80_time"].min()
#     ax[1,0].bar(
#         NAMES[study_name],
#         min_latency,
#         color=COLORS[study_name],
#         alpha=alpha,
#         width=bar_width
#     )
#     ax[1,0].text(
#         NAMES[study_name],
#         min_latency,
#         f'{min_latency:.2f}s',
#         ha='center',
#         va='bottom'
#     )

#     # Pareto area
#     finite_0 = df["values_0"].apply(np.isfinite)
#     finite_1 = df["values_1"].apply(np.isfinite)
#     limits = [
#         df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
#         df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
#     ]
#     pareto_area_pct = 100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"])
#     pareto_area_pct_max = pareto_area_pct.values[-1]
#     ax[1,1].bar(
#         NAMES[study_name],
#         pareto_area_pct_max,
#         color=COLORS[study_name],
#         alpha=alpha,
#         width=bar_width
#     )
#     ax[1,1].text(
#         NAMES[study_name],
#         pareto_area_pct_max,
#         f'{pareto_area_pct_max:.2f}%',
#         ha='center',
#         va='bottom'
#     )

# if SHOW_TITLE:
#     ax[0,0].set_title("Accuracy")
# ax[0,0].set_ylabel("Maximum Accuracy")
# ylim = ax[0,0].get_ylim()
# ax[0,0].set_ylim(ylim[0], ylim[1] * 1.1)

# if SHOW_TITLE:
#     ax[0,1].set_title("Cost")
# ax[0,1].set_ylabel("Cumulative Cost (Dollar)")
# ylim = ax[0,1].get_ylim()
# ax[0,1].set_ylim(ylim[0], ylim[1] * 1.1)

# if SHOW_TITLE:
#     ax[1,0].set_title("Latency")
# ax[1,0].set_ylabel("Minimum P80 Latency (Second)")
# ylim = ax[1,0].get_ylim()
# ax[1,0].set_ylim(ylim[0], ylim[1] * 1.1)

# if SHOW_TITLE:
#     ax[1,1].set_title("Pareto Area")
# ax[1,1].set_ylabel("Pareto Area (%)")
# ylim = ax[1,1].get_ylim()
# ax[1,1].set_ylim(ylim[0], ylim[1] * 1.1)

# if SHOW_TITLE:
#     plt.suptitle("HotpotQA Accuracy/Cost Optimization State After Seeding")
# plt.tight_layout()
# plt.savefig(RESULTS_DIR / f"{EXPORT_FILENAME}-bars.png", dpi=300)
# plt.savefig(RESULTS_DIR / f"{EXPORT_FILENAME}-bars.pdf", dpi=300)
# plt.show()