## Pareto-Pruner Experiment
We used this notebook to compare a study that was using Pareto-Pruner for early stopping with the same setup but without Pareto-Pruner.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB.tb_highlight = "bg:#3e0054"

In [None]:
import pandas as pd

from syftr.configuration import cfg

N_TRIALS_FOR_ESTIMATE = 5
N_TARGET = 300
SUCCESS_RATE = 0.9
N_RANDOM_TRIALS = 46
N_TRIALS_RESULT = 200
N_EVALS = 200
START_AFTER_SEEDING = True
SHOW_TITLE = False

DATASET_NAME = "FinanceBench"
# DATASET_NAME = "HotpotQA"

STUDY_NAMES = [
    "pruner1--with-pruner--financebench_hf",
    "pruner3--without-pruner--financebench",
    # "pruner1--with-pruner--hotpotqa_hf-train_hard",
    # "pruner3--without-pruner--hotpot-train-hard",
]

NAMES = {
    "pruner3--without-pruner--financebench": "FinanceBench\n({n_evals} Samples)",
    "pruner3--without-pruner--hotpot-train-hard": "HotpotQA\n({n_evals} Samples)",
    "pruner2--without-pruner--financebench_hf": "FinanceBench\n({n_evals} Samples)",
    "pruner2--without-pruner--hotpotqa_hf-train_hard": "HotpotQA\n({n_evals} Samples)",
    "pruner1--with-pruner--financebench_hf": "FinanceBench\n(Pareto-Pruner)",
    "pruner1--with-pruner--hotpotqa_hf-train_hard": "HotpotQA\n(Pareto-Pruner)",
}

LABELS = {
    "pruner3--without-pruner--financebench": "FinanceBench ({n_evals} Samples)",
    "pruner3--without-pruner--hotpot-train-hard": "HotpotQA ({n_evals} Samples)",
    "pruner2--without-pruner--financebench_hf": "FinanceBench ({n_evals} Samples)",
    "pruner2--without-pruner--hotpotqa_hf-train_hard": "HotpotQA ({n_evals} Samples)",
    "pruner1--with-pruner--financebench_hf": "FinanceBench (Pareto-Pruner)",
    "pruner1--with-pruner--hotpotqa_hf-train_hard": "HotpotQA (Pareto-Pruner)",
}

COLORS = {
    "pruner3--without-pruner--financebench": "dodgerblue",
    "pruner3--without-pruner--hotpot-train-hard": "dodgerblue",
    "pruner2--without-pruner--financebench_hf": "dodgerblue",
    "pruner2--without-pruner--hotpotqa_hf-train_hard": "dodgerblue",
    "pruner1--with-pruner--financebench_hf": "limegreen",
    "pruner1--with-pruner--hotpotqa_hf-train_hard": "limegreen",
}

STORAGE = cfg.postgres.get_optuna_storage()
RESULTS_DIR = cfg.paths.results_dir / "pareto_pruner"
RESULTS_DIR.mkdir(exist_ok=True, parents=True)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [None]:
import optuna

dfs = []

for study_name in STUDY_NAMES:
    study = optuna.load_study(study_name=study_name, storage=STORAGE)
    df_tmp = study.trials_dataframe()
    df_tmp = df_tmp[df_tmp["state"] == "COMPLETE"]
    df_tmp["study_name"] = study_name
    dfs.append(df_tmp)

df = pd.concat(dfs)

In [None]:
import numpy as np
from paretoset import paretoset


def safe_paretoset(df: pd.DataFrame, sense):
    assert df.shape[1] == 2
    df = df.replace([np.inf, -np.inf], np.nan)
    if len(df.dropna(how='any')) == 0:
        return pd.Series(False, index=df.index)
    pareto = paretoset(df, sense=sense, use_numba=False)
    pareto = pd.Series(pareto, index=df.index)
    return pareto

def pareto_area(df_values, limits, sense):
    xmin, xmax = limits[1]
    ymin, ymax = limits[0]
    total_area = (xmax - xmin) * (ymax - ymin)

    df_pareto = df_values[safe_paretoset(df_values, sense=sense)].copy()
    df_pareto = df_pareto[
        (df_pareto.iloc[:, 1] >= xmin)
        & (df_pareto.iloc[:, 1] <= xmax)
        & (df_pareto.iloc[:, 0] >= ymin)
        & (df_pareto.iloc[:, 0] <= ymax)
    ]
    if len(df_pareto) == 0:
        return 0.0
    df_pareto.sort_values(by=[df_pareto.columns[1], df_pareto.columns[0]], inplace=True)

    dominated_area = 0.0
    for i in range(1, len(df_pareto)):
        y1, x1 = df_pareto.iloc[i - 1]
        y2, x2 = df_pareto.iloc[i]
        x1, x2 = np.clip([x1, x2], xmin, xmax)
        y1, y2 = np.clip([y1, y2], ymin, ymax)
        dominated_area += (x2 - x1) * (y1 - ymin)
    last_y, last_x = df_pareto.iloc[-1]
    last_y = np.clip(last_y, ymin, ymax)
    last_x = np.clip(last_x, xmin, xmax)
    dominated_area += (xmax - last_x) * (last_y - ymin)
    return dominated_area / total_area

def cumulative_pareto_area(df, limits, sense):
    df_values = df.sort_values(by="datetime_complete")[["values_0", "values_1"]]
    areas = pd.Series(index=df_values.index, dtype=float)
    for i, idx in enumerate(df_values.index):
        areas.loc[idx] = pareto_area(df_values.iloc[: i + 1, :], limits, sense)
    return areas

In [None]:
def align_curves(x1, y1, x2, y2):
    assert len(x1) == len(y1)
    assert len(x2) == len(y2)
    length = min(len(x1), len(x2))
    return x1[:length], y1[:length], x2[:length], y2[:length]

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from slugify import slugify

alpha = 0.8
figsize = (8, 4)
fig, ax = plt.subplots(1, 2, figsize=figsize)

trials_min = 300
cost_min = np.inf

for study_name in STUDY_NAMES:
    df_study = df[df["study_name"] == study_name].copy()
    df_study[df_study["user_attrs_is_seeding"] == True] = \
        df_study[df_study["user_attrs_is_seeding"] == True].sort_values(by="user_attrs_flow")

    n_trials = len(df_study)
    if n_trials == 0:
        continue

    df_study.reset_index(drop=True, inplace=True)

    n_evals = min(N_EVALS, int(df_study["user_attrs_metric_num_total"].max()))
    print(f"Study {study_name} has {n_trials} trials")
    
    # cumulative cost
    x_values = [i for i in range(1, n_trials + 1)]
    if x_values[-1] < trials_min:
        trials_min = x_values[-1]

    cumulative_cost = [
        df_study[:n+1]["user_attrs_metric_llm_cost_total"].sum() for n in range(n_trials)
    ]
    if cumulative_cost[-1] < cost_min:
        cost_min = cumulative_cost[-1]

    ax[0].plot(
        x_values,
        cumulative_cost,
        label=LABELS[study_name].format(n_evals=n_evals),
        color=COLORS[study_name],
        alpha=alpha,
    )
    
    # Pareto area
    finite_0 = df["values_0"].apply(np.isfinite)
    finite_1 = df["values_1"].apply(np.isfinite)
    limits = [
        df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
        df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
    ]
    cum_pareto_area = 100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"])

    ax[1].plot(
        cumulative_cost,
        cum_pareto_area,
        label=LABELS[study_name].format(n_evals=n_evals),
        color=COLORS[study_name],
        alpha=alpha,
    )

if SHOW_TITLE:
    ax[0].set_title("Cost")
ax[0].set_xlabel("Number of Completed Trials")
ax[0].set_ylabel("Cumulative Cost (Dollar)")
ax[0].legend(loc="best", fontsize="small", framealpha=1)
ax[0].set_xlim(1, trials_min)
# ax[0].set_ylim(0, 300)

if SHOW_TITLE:
    ax[1].set_title("Pareto Area")
ax[1].set_xlabel("Cumulative Cost (Dollar)")
ax[1].set_ylabel("Pareto Area (%)")
ax[1].legend(loc="best", fontsize="small", framealpha=1)
ax[1].set_xlim(1, cost_min)
ax[1].set_ylim(70, 95)

if SHOW_TITLE:
    plt.suptitle(f"{DATASET_NAME} Accuracy/Cost-Optimization with and without ParetoPruner")
plt.tight_layout()
plt.savefig(RESULTS_DIR / f"pareto-pruner--{slugify(DATASET_NAME)}--plots.png", dpi=300)
plt.savefig(RESULTS_DIR / f"pareto-pruner--{slugify(DATASET_NAME)}--plots.pdf", dpi=300)
plt.show()

In [None]:
N_TRIALS = 300   # 175 300
USD_SPEND = 300  # 175 300

alpha = 0.8
bar_width = 0.7
figsize = (8, 4)
fig, ax = plt.subplots(1, 2, figsize=figsize)

for study_name in STUDY_NAMES:
    df_study = df[df["study_name"] == study_name].copy()
    df_study[df_study["user_attrs_is_seeding"] == True] = \
        df_study[df_study["user_attrs_is_seeding"] == True].sort_values(by="user_attrs_flow")

    n_trials = len(df_study)
    if n_trials == 0:
        continue

    df_study.reset_index(drop=True, inplace=True)

    n_evals = min(N_EVALS, int(df_study["user_attrs_metric_num_total"].max()))
    print(f"Study {study_name} has {n_trials} trials")
    
    # cumulative cost
    x_values = [i for i in range(1, n_trials + 1)]
    if x_values[-1] < trials_min:
        trials_min = x_values[-1]

    cumulative_cost = [
        df_study[:n+1]["user_attrs_metric_llm_cost_total"].sum() for n in range(n_trials)
    ]

    ax[0].bar(
        NAMES[study_name].format(n_evals=n_evals),
        cumulative_cost[N_TRIALS],
        color=COLORS[study_name],
        alpha=alpha, width=bar_width,
    )
    ax[0].text(
        NAMES[study_name].format(n_evals=n_evals),
        cumulative_cost[N_TRIALS],
        f'${int(cumulative_cost[N_TRIALS])}',
        ha='center',
        va='bottom'
    )
    
    # Pareto area
    finite_0 = df["values_0"].apply(np.isfinite)
    finite_1 = df["values_1"].apply(np.isfinite)
    limits = [
        df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
        df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
    ]
    cum_pareto_area = 100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"]).values

    df_area = pd.DataFrame(index=cumulative_cost, data=cum_pareto_area)
    area = df_area[df_area.index <= USD_SPEND].iloc[-1].values[0]

    ax[1].bar(
        NAMES[study_name].format(n_evals=n_evals),
        area, color=COLORS[study_name], alpha=alpha,
        width=bar_width,
    )
    ax[1].text(
        NAMES[study_name].format(n_evals=n_evals),
        area,
        f'{area:.1f}%',
        ha='center',
        va='bottom'
    )

if SHOW_TITLE:
    ax[0].set_title("Cost")
ax[0].set_xlabel(f"{N_TRIALS} Trials")
ax[0].set_ylabel("Cumulative Cost (Dollar)")
ax[0].set_ylim(0, 600)  # 600 # 350

if SHOW_TITLE:
    ax[1].set_title("Pareto Area")
ax[1].set_xlabel(f"~ ${USD_SPEND} Cost")
ax[1].set_ylabel("Pareto Area (%)")
ax[1].set_ylim(82, 92)   # 70, 90

if SHOW_TITLE:
    plt.suptitle(f"{DATASET_NAME} Accuracy/Cost-Optimization with and without ParetoPruner")
plt.tight_layout()
plt.savefig(RESULTS_DIR / f"pareto-pruner--{slugify(DATASET_NAME)}--bars.png", dpi=300)
plt.savefig(RESULTS_DIR / f"pareto-pruner--{slugify(DATASET_NAME)}--bars.pdf", dpi=300)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from slugify import slugify

alpha = 0.8
figsize = (6, 4)
fig, ax = plt.subplots(1, 1, figsize=figsize)

trials_min = np.inf
cost_min = np.inf

for study_name in STUDY_NAMES:
    df_study = df[df["study_name"] == study_name].copy()
    df_study[df_study["user_attrs_is_seeding"] == True] = \
        df_study[df_study["user_attrs_is_seeding"] == True].sort_values(by="user_attrs_flow")

    n_trials = len(df_study)
    if n_trials == 0:
        continue

    df_study.reset_index(drop=True, inplace=True)

    n_evals = min(N_EVALS, int(df_study["user_attrs_metric_num_total"].max()))
    print(f"Study {study_name} has {n_trials} trials")
        
    # Pareto area
    finite_0 = df["values_0"].apply(np.isfinite)
    finite_1 = df["values_1"].apply(np.isfinite)
    limits = [
        df.loc[finite_0, "values_0"].quantile([0.1, 1]).values,
        df.loc[finite_1, "values_1"].quantile([0, 0.9]).values,
    ]
    cum_pareto_area = 100 * cumulative_pareto_area(df_study, limits, sense=["max", "min"])

    ax.plot(
        range(1, n_trials + 1),
        cum_pareto_area,
        label=LABELS[study_name].format(n_evals=n_evals),
        color=COLORS[study_name],
        alpha=alpha,
    )

if SHOW_TITLE:
    ax.set_title("Pareto Area")
ax.set_xlabel("Number of Completed Trials")
ax.set_ylabel("Pareto Area (%)")
ax.legend(loc="lower right", fontsize="small", framealpha=1)
# ax.set_xscale("log")
# ax.set_xlim(1, cost_min)
# ax.set_ylim(70, 90)

if SHOW_TITLE:
    plt.suptitle(f"{DATASET_NAME} Optimization with and without ParetoPruner")
plt.tight_layout()
plt.savefig(RESULTS_DIR / f"pareto-pruner--{slugify(DATASET_NAME)}--sanity-check.png", dpi=300)
plt.savefig(RESULTS_DIR / f"pareto-pruner--{slugify(DATASET_NAME)}--sanity-check.pdf", dpi=300)
plt.show()