## Seeding Experiment
The notebook was used to compare optimizations that were identically configured expect for the type of seeding. We compared random seeding, static seeding and transfer learning.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB._tb_highlight = "bg:#3e0054"

In [None]:
import pandas as pd

from syftr.configuration import cfg

STUDY_NAMES = [
    "seeding3--random--hotpot-train-hard",
    "seeding3--static--hotpot-train-hard",
    "seeding3--transfer--hotpot-train-hard",
]

NAMES = {
    "seeding3--random--hotpot-train-hard": "Random\nSeeding",
    "seeding3--static--hotpot-train-hard": "Static\nSeeding",
    "seeding3--transfer--hotpot-train-hard": "Transfer\nLearning",
}

LABELS = {
    "seeding3--random--hotpot-train-hard": "Random Seeding",
    "seeding3--static--hotpot-train-hard": "Static Seeding",
    "seeding3--transfer--hotpot-train-hard": "Transfer Learning",
}

COLORS = {
    "seeding3--random--hotpot-train-hard": "tomato",
    "seeding3--static--hotpot-train-hard": "dodgerblue",
    "seeding3--transfer--hotpot-train-hard": "limegreen",
}

N_TRIALS_FOR_ESTIMATE = 5
N_TARGET = 300
SUCCESS_RATE = 0.9
N_RANDOM_TRIALS = 46
N_TRIALS_RESULT = 200
SHOW_TITLE = False

RESULTS_DIR = cfg.paths.results_dir / "seeding"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

STORAGE = cfg.postgres.get_optuna_storage()

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [None]:
import optuna

dfs = []

for study_name in STUDY_NAMES:
    study = optuna.load_study(study_name=study_name, storage=STORAGE)
    df_tmp = study.trials_dataframe()
    df_tmp["study_name"] = study_name
    df_tmp = df_tmp[df_tmp["number"] < N_TRIALS_RESULT]
    dfs.append(df_tmp)

df = pd.concat(dfs)

In [None]:
import numpy as np
from paretoset import paretoset


def safe_paretoset(df: pd.DataFrame, sense):
    assert df.shape[1] == 2
    df = df.replace([np.inf, -np.inf], np.nan)
    if len(df.dropna(how='any')) == 0:
        return pd.Series(False, index=df.index)
    pareto = paretoset(df, sense=sense, use_numba=False)
    pareto = pd.Series(pareto, index=df.index)
    return pareto

def pareto_area(df_values, limits, sense):
    xmin, xmax = limits[1]
    ymin, ymax = limits[0]
    total_area = (xmax - xmin) * (ymax - ymin)

    df_pareto = df_values[safe_paretoset(df_values, sense=sense)].copy()
    df_pareto = df_pareto[
        (df_pareto.iloc[:, 1] >= xmin)
        & (df_pareto.iloc[:, 1] <= xmax)
        & (df_pareto.iloc[:, 0] >= ymin)
        & (df_pareto.iloc[:, 0] <= ymax)
    ]
    if len(df_pareto) == 0:
        return 0.0
    df_pareto.sort_values(by=[df_pareto.columns[1], df_pareto.columns[0]], inplace=True)

    dominated_area = 0.0
    for i in range(1, len(df_pareto)):
        y1, x1 = df_pareto.iloc[i - 1]
        y2, x2 = df_pareto.iloc[i]
        x1, x2 = np.clip([x1, x2], xmin, xmax)
        y1, y2 = np.clip([y1, y2], ymin, ymax)
        dominated_area += (x2 - x1) * (y1 - ymin)
    last_y, last_x = df_pareto.iloc[-1]
    last_y = np.clip(last_y, ymin, ymax)
    last_x = np.clip(last_x, xmin, xmax)
    dominated_area += (xmax - last_x) * (last_y - ymin)
    return dominated_area / total_area

def cumulative_pareto_area(df, limits, sense):
    df_values = df.sort_values(by="datetime_complete")[["values_0", "values_1"]]
    areas = pd.Series(index=df_values.index, dtype=float)
    for i, idx in enumerate(df_values.index):
        areas.loc[idx] = pareto_area(df_values.iloc[: i + 1, :], limits, sense)
    return areas

In [None]:
import matplotlib.pyplot as plt

alpha = 0.8
figsize = (8, 6)
fig, ax = plt.subplots(2, 2, figsize=figsize)

for study_name in STUDY_NAMES:
    df_study = df[df["study_name"] == study_name].copy()
    n_trials = len(df_study)
    if n_trials == 0:
        continue

    print(f"Study {study_name} has {n_trials} trials")
    
    if "random" in study_name:
        max_seeding = N_RANDOM_TRIALS
    else:
        max_seeding = df_study[df_study["user_attrs_is_seeding"] == True]["number"].max() + 1

    # accuracy
    accuracy = [
        df_study[
            (df_study["number"] <= n)
            & (df_study["user_attrs_metric_num_success"] >= SUCCESS_RATE * df_study["user_attrs_metric_num_total"].max())
        ]["values_0"].max() for n in range(n_trials)
    ]
    ax[0,0].plot(
        df_study["number"] + 1,
        accuracy,
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
    )
    ax[0,0].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=0.7, alpha=0.7, label="Seeding ends")

    # cumulative cost
    cumulative_cost = [
        df_study[df_study["number"] <= n]["user_attrs_metric_llm_cost_total"].sum() for n in range(n_trials)
    ]
    ax[0,1].plot(
        df_study["number"] + 1,
        cumulative_cost,
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
    )
    ax[0,1].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=0.7, alpha=0.7, label="Seeding ends")
    
    # minimum latency
    min_latency = [
        df_study[
            (df_study["number"] <= n)
            & (df_study["user_attrs_metric_num_success"] >= SUCCESS_RATE * df_study["user_attrs_metric_num_total"].max())
        ]["user_attrs_metric_p80_time"].min() for n in range(n_trials)
    ]
    ax[1,0].plot(
        df_study["number"] + 1,
        min_latency,
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
    )
    ax[1,0].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=0.7, alpha=0.7, label="Seeding ends")

    # Pareto area
    finite_0 = df["values_0"].apply(np.isfinite)
    finite_1 = df["values_1"].apply(np.isfinite)
    limits = [
        df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
        df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
    ]
    ax[1,1].plot(
        df_study["number"] + 1,
        100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"]),
        label=LABELS[study_name],
        color=COLORS[study_name],
        alpha=alpha,
    )
    ax[1,1].axvline(max_seeding, color=COLORS[study_name], linestyle="--", linewidth=0.7, alpha=0.7, label="Seeding ends")

if SHOW_TITLE:
    ax[0,0].set_title("Accuracy (Zoomed)")
ax[0,0].set_ylabel("Maximum Accuracy")
ax[0,0].legend(loc="lower left", fontsize="small", framealpha=1)
ax[0,0].set_xscale("log")
ax[0,0].set_ylim(0.70, 1.0)

if SHOW_TITLE:
    ax[0,1].set_title("Cost")
ax[0,1].set_ylabel("Cumulative Cost (Dollar)")
ax[0,1].legend(loc="upper left", fontsize="small", framealpha=1)
ax[0,1].set_xscale("log")

if SHOW_TITLE:
    ax[1,0].set_title("Latency (Zoomed)")
ax[1,0].set_xlabel("Number of Trials")
ax[1,0].set_ylabel("Minimum P80 Latency (Second)")
ax[1,0].legend(loc="upper left", fontsize="small", framealpha=1)
ax[1,0].set_xscale("log")
ax[1,0].set_yscale("log")
ax[1,0].set_ylim(0.9, 3)

if SHOW_TITLE:
    ax[1,1].set_title("Pareto Area (Zoomed)")
ax[1,1].set_xlabel("Number of Trials")
ax[1,1].set_ylabel("Pareto Area (%)")
ax[1,1].legend(loc="lower left", fontsize="small", framealpha=1)
ax[1,1].set_xscale("log")
ax[1,1].set_ylim(80, 100)

if SHOW_TITLE:
    plt.suptitle(f"HotpotQA Accuracy/Cost-Optimization Progress for Different Seeding Types ({N_TRIALS_RESULT} Trials)")
plt.tight_layout()
plt.show()

In [None]:
alpha = 0.8
bar_width = 0.7
figsize = (8, 6)
fig, ax = plt.subplots(2, 2, figsize=figsize)

for study_name in STUDY_NAMES:
    df_study = df[df["study_name"] == study_name].copy()
    n_trials = len(df_study)
    if n_trials == 0:
        continue

    df_study = df_study[df_study["user_attrs_metric_num_success"] >= SUCCESS_RATE * df_study["user_attrs_metric_num_total"].max()]
    
    if "random" in study_name:
        df_study = df_study[df_study["number"] < N_RANDOM_TRIALS]
    else:
        df_study = df_study[df_study["user_attrs_is_seeding"] == True]

    # accuracy
    accuracy = df_study["values_0"].max()

    ax[0,0].bar(
        NAMES[study_name],
        accuracy,
        color=COLORS[study_name],
        alpha=alpha,
        width=bar_width
    )

    ax[0,0].text(
        NAMES[study_name],
        accuracy,
        f'{accuracy:.2f}',
        ha='center',
        va='bottom'
    )

    # cumulative cost
    cumulative_cost = df_study["user_attrs_metric_llm_cost_total"].sum()
    ax[0,1].bar(
        NAMES[study_name],
        cumulative_cost,
        color=COLORS[study_name],
        alpha=alpha,
        width=bar_width
    )
    ax[0,1].text(
        NAMES[study_name],
        cumulative_cost,
        f'${cumulative_cost:.2f}',
        ha='center',
        va='bottom'
    )

    # minimum latency
    min_latency = df_study["user_attrs_metric_p80_time"].min()
    ax[1,0].bar(
        NAMES[study_name],
        min_latency,
        color=COLORS[study_name],
        alpha=alpha,
        width=bar_width
    )
    ax[1,0].text(
        NAMES[study_name],
        min_latency,
        f'{min_latency:.2f}s',
        ha='center',
        va='bottom'
    )

    # Pareto area
    finite_0 = df["values_0"].apply(np.isfinite)
    finite_1 = df["values_1"].apply(np.isfinite)
    limits = [
        df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
        df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
    ]
    pareto_area_pct = 100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"])
    pareto_area_pct_max = pareto_area_pct.values[-1]
    ax[1,1].bar(
        NAMES[study_name],
        pareto_area_pct_max,
        color=COLORS[study_name],
        alpha=alpha,
        width=bar_width
    )
    ax[1,1].text(
        NAMES[study_name],
        pareto_area_pct_max,
        f'{pareto_area_pct_max:.2f}%',
        ha='center',
        va='bottom'
    )

if SHOW_TITLE:
    ax[0,0].set_title("Accuracy")
ax[0,0].set_ylabel("Maximum Accuracy")
ylim = ax[0,0].get_ylim()
ax[0,0].set_ylim(ylim[0], ylim[1] * 1.1)

if SHOW_TITLE:
    ax[0,1].set_title("Cost")
ax[0,1].set_ylabel("Cumulative Cost (Dollar)")
ylim = ax[0,1].get_ylim()
ax[0,1].set_ylim(ylim[0], ylim[1] * 1.1)

if SHOW_TITLE:
    ax[1,0].set_title("Latency")
ax[1,0].set_ylabel("Minimum P80 Latency (Second)")
ylim = ax[1,0].get_ylim()
ax[1,0].set_ylim(ylim[0], ylim[1] * 1.1)

if SHOW_TITLE:
    ax[1,1].set_title("Pareto Area")
ax[1,1].set_ylabel("Pareto Area (%)")
ylim = ax[1,1].get_ylim()
ax[1,1].set_ylim(ylim[0], ylim[1] * 1.1)

if SHOW_TITLE:
    plt.suptitle("HotpotQA Accuracy/Cost Optimization State After Seeding")
plt.tight_layout()
plt.savefig(RESULTS_DIR / "hotpot-seeding.png", dpi=300)
plt.savefig(RESULTS_DIR / "hotpot-seeding.pdf", dpi=300)
plt.show()