## Silver Bullets
This notebook helps to answer the question if there are flows "silver bullets" that perform on many datasets well.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB._tb_highlight = "bg:#3e0054"

In [None]:
from syftr.optuna_helper import get_study_names

SUCCESS_RATE = 0.5
EXCLUDE_ZERO_ACC = False
RANK_CORR_METHOD = "kendall"  # "spearman", "pearson"
RESTRICT_TO_TITLE = True

INCLUDE_REGEX = [
    "rank1--rag-and-agents.*",
    "rank2--rag-and-agents.*",
]
EXCLUDE_REGEX = [
    # ".*crag.*"
]

OBJ1 = "accuracy"
OBJ2 = "llm_cost_mean"

OBJ_NAME = {
    "accuracy": "Accuracy",
    "llm_cost_mean": "Cents per 100 Flow Calls"
}

COST_SCALE = 1e4

SYMBOLS = {
    "kendall": "τ",
    "spearman": "ρ",
    "pearson": "r",
}
SYMBOL = SYMBOLS[RANK_CORR_METHOD]

TITLE = {
    'rank1--rag-and-agents--bright_hf': 'Bright Biology',
    'rank1--rag-and-agents--crag_hf-music': 'CRAG3 Music',
    'rank1--rag-and-agents--crag_hf-sports': 'CRAG3 Sports',
    'rank1--rag-and-agents--drdocs_hf': 'DRDocs',
    'rank1--rag-and-agents--financebench_hf': 'FinanceBench',
    'rank1--rag-and-agents--hotpotqa_hf-train_hard': 'HotpotQA',
    'rank1--rag-and-agents--infinitebench_hf': 'InfiniteBench',
    'rank1--rag-and-agents--multihoprag_hf': 'MultihopRAG',
    'rank1--rag-and-agents--phantomwikiv050_hf': 'PhantomWiki',
    # 'rank2--rag-and-agents--bright_hf': 'Bright Biology',
    # 'rank2--rag-and-agents--crag_hf-music': 'CRAG3 Music',
    # 'rank2--rag-and-agents--crag_hf-sports': 'CRAG3 Sports',
    # 'rank2--rag-and-agents--drdocs_hf': 'DRDocs',
    # 'rank2--rag-and-agents--financebench_hf': 'FinanceBench',
    # 'rank2--rag-and-agents--hotpotqa_hf-train_hard': 'HotpotQA',
    # 'rank2--rag-and-agents--infinitebench_hf': 'InfiniteBench',
    # 'rank2--rag-and-agents--multihoprag_hf': 'MultihopRAG',
    # 'rank2--rag-and-agents--phantomwikiv050_hf': 'PhantomWiki',
}

study_names = get_study_names(
    include_regex=INCLUDE_REGEX,
    exclude_regex=EXCLUDE_REGEX,
)

study_names

In [None]:
import json

import pandas as pd

from syftr.optuna_helper import get_completed_trials

IGNORE_PARAMS = ["enforce_full_evaluation", "splitter_chunk_size"]

dfs = []
for study_name in study_names:
    if RESTRICT_TO_TITLE and study_name not in TITLE:
        print(f"Skipping study {study_name} as it is not in TITLE.")
        continue

    df: pd.DataFrame = get_completed_trials(study_name, success_rate=SUCCESS_RATE)

    assert not df.empty, f"Study {study_name} has no completed trials."

    df["user_attrs_flow"] = df["user_attrs_flow"].apply(
        lambda x: json.dumps({k: v for k, v in sorted(json.loads(x).items()) if k not in IGNORE_PARAMS})
    )
    df = df.groupby(["study_name", "user_attrs_flow"], as_index=False).agg(
        {
            "values_0": "mean",
            "values_1": "mean",
            "user_attrs_dataset": "first",
            "user_attrs_metric_objective_1_name": "first",
            "user_attrs_metric_objective_2_name": "first",
        }
    )

    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)

assert df["user_attrs_metric_objective_1_name"].nunique() == 1, "Multiple objective 1 names found."
assert df["user_attrs_metric_objective_2_name"].nunique() == 1, "Multiple objective 2 names found."

obj1_name = df["user_attrs_metric_objective_1_name"].iloc[0]
obj2_name = df["user_attrs_metric_objective_2_name"].iloc[0]

print(f"Total number of completed trials: {len(df)}")

In [None]:
intersection = set(df["user_attrs_flow"].unique())
for study_name in study_names:
    if RESTRICT_TO_TITLE and study_name not in TITLE:
        print(f"Skipping study {study_name} as it is not in TITLE.")
        continue
    study_df = df[df["study_name"] == study_name].copy()
    if EXCLUDE_ZERO_ACC:
        study_df = study_df[study_df["values_0"] > 0]
    flow = study_df["user_attrs_flow"].values
    intersection = intersection.intersection(set(flow))
intersection = list(intersection)
print(f"There are {len(intersection)} common flows across all studies.")

df_avg = df[df["user_attrs_flow"].isin(intersection)].copy()
df_avg["values_0_normalized"] = df_avg["values_0"] / df_avg["values_0"].max()
df_avg["values_1_normalized"] = df_avg["values_1"] / df_avg["values_1"].max()
df_avg = df_avg.groupby(["user_attrs_flow"], as_index=False).agg(
    {
        "values_0_normalized": "mean",
        "values_1_normalized": "mean",
    }
)
df_avg = df_avg.sort_values(
    by=["values_0_normalized", "values_1_normalized"],
    ascending=[True, True],
)
df_avg.reset_index(drop=True, inplace=True)

In [None]:
from syftr.configuration import cfg
from syftr.optuna_helper import get_flows_from_trials, get_pareto_mask

pareto_mask = get_pareto_mask(df_avg.rename(columns={"values_0_normalized": "values_0", "values_1_normalized": "values_1"}))
df_sb = df_avg[pareto_mask]
df_sb = df_sb.sort_values(
    by="values_0_normalized",
    ascending=True,
)
df_sb = df_sb.reset_index(drop=True)

flows_sb = get_flows_from_trials(df_sb)

file_path = cfg.paths.results_dir / "silver-bullets.json"
with open(file_path, 'w') as json_file:
    json.dump(flows_sb, json_file, indent=4)

print(f"Saved {len(df_sb)} silver bullets to: {file_path}")

In [None]:
from matplotlib import pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(6, 4))


ax.scatter(
    df_avg["values_1_normalized"],
    df_avg["values_0_normalized"],
    facecolors="none",
    edgecolors="black",
    zorder=3,
    s=10,
    label="Common Trials",
    alpha=0.8,
    marker="o",
)


ax.step(
        df_sb["values_1_normalized"],
        df_sb["values_0_normalized"],
        where="post",
        marker="o",
        color="black",
        label="Silver Bullets",
        zorder=2,
        markersize=0,
        linestyle=":",
        linewidth=1.5,
        alpha=0.8,
    )


ax.set_xlabel(f"{OBJ_NAME[OBJ2]} (max-normalized)")
ax.set_ylabel(f"{OBJ_NAME[OBJ1]} (max-normalized)")


ax.set_xscale("log")


plt.legend()
plt.tight_layout()
plt.show()

In [None]:
def get_relative_area(df_ref: pd.DataFrame, df_other: pd.DataFrame) -> float:
    """
    Calculate the relative area under the curve (AUC) of two Pareto-frontiers.
    """""
    pareto_mask = get_pareto_mask(df_ref)
    df_ref = df_ref[pareto_mask].copy()
    df_ref = df_ref.sort_values(
        by=["values_0", "values_1"],
        ascending=[True, True],
    )
    df_ref.reset_index(drop=True, inplace=True)

    pareto_mask = get_pareto_mask(df_other)
    df_other = df_other[pareto_mask].copy()
    df_other = df_other.sort_values(
        by=["values_0", "values_1"],
        ascending=[True, True],
    )
    df_other.reset_index(drop=True, inplace=True)
    
    x_max = max(df_ref["values_1"].max(), df_other["values_1"].max())

    area_ref = 0
    for i in range(len(df_ref) - 1):
        x1, y1 = df_ref.iloc[i][["values_1", "values_0"]]
        x2, y2 = df_ref.iloc[i + 1][["values_1", "values_0"]]
        area_ref += (x2 - x1) * y1

    if x2 < x_max:
        area_ref += (x_max - x2) * y2

    area_other = 0
    for i in range(len(df_other) - 1):
        x1, y1 = df_other.iloc[i][["values_1", "values_0"]]
        x2, y2 = df_other.iloc[i + 1][["values_1", "values_0"]]
        area_other += (x2 - x1) * y1

    if x2 < x_max:
        area_other += (x_max - x2) * y2

    return area_other / area_ref

In [None]:
from matplotlib import pyplot as plt
from matplotlib.ticker import AutoMinorLocator

from syftr.analytics import get_rank_correlation

N_COL = 3

n_studies = len([s for s in study_names if s in TITLE])
n_rows = (n_studies + N_COL - 1) // N_COL

fig, axes = plt.subplots(n_rows, N_COL, figsize=(3 * N_COL, 3 * n_rows), squeeze=False)
plot_idx = 0

pareto_areas = []
for study_name in study_names:
    if study_name not in TITLE:
        print(f"Skipping study '{study_name}' as it is not in the TITLE dictionary.")
        continue

    row, col = divmod(plot_idx, N_COL)
    ax = axes[row][col]

    df_trials = df[df["study_name"] == study_name]
    ax.scatter(
        df_trials["values_1"] * COST_SCALE,
        df_trials["values_0"],
        c="tomato",
        zorder=3,
        s=4,
        label=f"{len(df_trials)} Trials (all)",
        alpha=0.4,
    )

    pareto_mask = get_pareto_mask(df_trials)
    df_pareto = df_trials[pareto_mask]
    df_pareto = df_pareto.sort_values(by="values_0", ascending=True)
    ax.step(
        df_pareto["values_1"] * COST_SCALE,
        df_pareto["values_0"],
        where="post",
        marker="o",
        color="tomato",
        label="Pareto-Frontier",
        zorder=1,
        markersize=0,
        alpha=0.6,
    )

    df_silver = df[
        (df["user_attrs_flow"].isin(df_sb["user_attrs_flow"]))
        & (df["study_name"] == study_name)].copy()

    correlation_0 = get_rank_correlation(df_sb, df_silver, rank_by=["values_0_normalized", "values_0"])
    # correlation_1 = get_rank_correlation(df_sb, df_silver, method=RANK_CORR_METHOD, rank_by=["values_1_normalized", "values_1"])
    
    ax.scatter(
        df_silver["values_1"] * COST_SCALE,
        df_silver["values_0"],
        edgecolors="black",
        facecolors="none",
        zorder=4,
        s=10,
        label=f"{len(df_silver)} Silver Bullets ({SYMBOL}={correlation_0[RANK_CORR_METHOD]["correlation"]:.2f})",
        alpha=1,
        marker="o",
        linewidths=1,
    )

    pareto_mask = get_pareto_mask(df_silver)
    df_silver_pareto = df_silver[pareto_mask]
    df_silver_pareto = df_silver_pareto.sort_values(
        by="values_0",
        ascending=True,
    )
    
    x_pareto = list(df_pareto["values_1"])
    x_silver = list(df_silver_pareto["values_1"])
    x = list(set(x_pareto + x_silver))
    x.sort()
    y = []
    y2 = []
    y_last = 0
    y2_last = 0
    for j in range(len(x)):
        if x[j] in x_pareto:
            y.append(df_pareto[df_pareto["values_1"] == x[j]]["values_0"].values[0])    
            if x[j] in x_silver:
                y2.append(df_silver_pareto[df_silver_pareto["values_1"] == x[j]]["values_0"].values[0])
            else:
                y2.append(y2_last)    
        else:
            y.append(y_last)
            y2.append(df_silver_pareto[df_silver_pareto["values_1"] == x[j]]["values_0"].values[0])
        y_last = y[-1]
        y2_last = y2[-1]
    
    ax.fill_between(
        [xx * COST_SCALE for xx in x],
        y,
        y2=y2,
        step="post",
        color="tomato",
        alpha=0.2,
        zorder=1,
    )

    ax.step(
        df_silver_pareto["values_1"] * COST_SCALE,
        df_silver_pareto["values_0"],
        where="post",
        marker="o",
        color="black",
        label="Silver Bullet-Frontier",
        zorder=2,
        markersize=0,
        linestyle=":",
        linewidth=1.5,
    )

    area_pct = get_relative_area(df_pareto, df_silver_pareto)
    x = list(df_silver_pareto["values_1"] * COST_SCALE)
    y = list(df_silver_pareto["values_0"])
    if df_pareto["values_1"].iloc[-1] > df_silver_pareto["values_1"].iloc[-1]:
        x.append(df_pareto["values_1"].iloc[-1] * COST_SCALE)
        y.append(df_pareto["values_0"].iloc[-1])
    ax.fill_between(
        x,
        y,
        y2=0,
        step="post",
        color="lightgray",
        alpha=0.4,
        zorder=1,
        label=f"Pareto-fraction: {area_pct:.2%}",
    )
    pareto_areas.append(area_pct)

    if plot_idx % N_COL == 0:
        ax.set_ylabel(OBJ_NAME[obj1_name])
    if plot_idx >= (n_rows - 1) * N_COL:
        ax.set_xlabel(OBJ_NAME[obj2_name])

    ax.set_xscale('log')
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{100*x:.0f}%'))
    ax.set_ylim(df_silver["values_0"].min() - 0.01, df_pareto["values_0"].max() + 0.01)

    legend = ax.legend(
        loc="lower right",
        framealpha=0.8,
        title=TITLE.get(study_name, study_name),
        fontsize=7,
    )
    legend.get_title().set_fontweight('bold')
    legend.get_title().set_fontsize(8)

    ax.yaxis.set_minor_locator(AutoMinorLocator())
    ax.yaxis.grid(which='major', linestyle='--', linewidth=0.5)
    ax.yaxis.grid(which='minor', linestyle=':', linewidth=0.5)

    plot_idx += 1

for k in range(plot_idx, n_rows * N_COL):
    row, col = divmod(k, N_COL)
    fig.delaxes(axes[row][col])

plt.tight_layout()
plt.savefig(
    cfg.paths.results_dir / "silver-bullets.pdf",
    bbox_inches="tight",
    dpi=300,
    transparent=False,
)
plt.show()
print(f"Average Pareto-fraction: {sum(pareto_areas) / len(pareto_areas):.2%}")

In [None]:
sb_flows_on_pareto = {f: [] for f in df_sb["user_attrs_flow"].values}

for study_name in study_names:
    if study_name not in TITLE:
        continue

    df_trials = df[df["study_name"] == study_name]
    pareto_mask = get_pareto_mask(df_trials)
    df_pareto = df_trials[pareto_mask]
    df_pareto = df_pareto.sort_values(by="values_0", ascending=True)
    pareto_flows = list(df_pareto["user_attrs_flow"].apply(json.loads).values)
    pareto_flows_str = {json.dumps(f): [] for f in pareto_flows}
      
    for sb_flow in sb_flows_on_pareto.keys():
        for flow in pareto_flows_str:
            if sb_flow == flow:
                sb_flows_on_pareto[sb_flow].append(study_name)
                break

print("Silver Bullet Flows (low-cost to high-accuracy):")
print("-------------------------------------------------")
for flow_str, (flow, studies) in enumerate(sb_flows_on_pareto.items()):
    print(f"Silver bullet {flow_str+1} is {len(studies)} times Pareto-optimal: {flow}")

In [None]:
df_table = pd.DataFrame(
    index=sb_flows_on_pareto.keys(), 
    columns=[OBJ_NAME[obj1_name], OBJ_NAME[obj2_name], "RAG mode", "Response Synthesizer", "Embedding Model", "Num. Dominating"],
)

df_sb_flow = df_sb.copy()
df_sb_flow.set_index("user_attrs_flow", inplace=True, drop=True)

for flow_str in df_sb_flow.index:
    assert flow_str in sb_flows_on_pareto.keys(), f"Flow {flow_str} is not in: {sb_flows_on_pareto.keys()}"
    flow = json.loads(flow_str)
    df_table.loc[flow_str, OBJ_NAME[obj1_name]] = df_sb_flow.loc[flow_str, "values_0_normalized"]
    df_table.loc[flow_str, OBJ_NAME[obj2_name]] = df_sb_flow.loc[flow_str, "values_1_normalized"]
    df_table.loc[flow_str, "RAG mode"] = flow["rag_mode"]
    df_table.loc[flow_str, "Response Synthesizer"] = flow.get("response_synthesizer_llm", "None")
    df_table.loc[flow_str, "Embedding Model"] = flow.get("rag_embedding_model", "None")
    df_table.loc[flow_str, "Num. Dominating"] = len(sb_flows_on_pareto[flow_str])

df_table.reset_index(inplace=True, drop=True)
df_table = df_table.sort_values(
    by=[OBJ_NAME[obj1_name], OBJ_NAME[obj2_name]],
    ascending=[False, True],
)
df_table.reset_index(drop=True, inplace=True)
df_table = df_table.rename(columns={
    OBJ_NAME[obj1_name]: f"{OBJ_NAME[obj1_name]} (Avg Norm.)",
    OBJ_NAME[obj2_name]: f"{OBJ_NAME[obj2_name]} (Avg Norm.)",
})

df_table

In [None]:
latex_str = df_table.to_latex(index=False, escape=False, float_format="%.2f", column_format="rrlllr")
print(latex_str)