## Baselines
This notebook extracts baselines used in our syftr paper.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB.tb_highlight = "bg:#3e0054"

In [None]:
from syftr.optuna_helper import get_completed_trials

STUDY_NAMES = [
    "bench14--small-models--crag-music",
    "bench14--small-models--crag-sports",
    "bench14--small-models--drdocs",
    "bench14--small-models--financebench",
    "bench14--small-models--hotpot-train-hard",
    "bench14--small-models--infinitebench",
]

df_all = get_completed_trials(STUDY_NAMES)
df_all.columns

In [None]:
import pandas as pd
from syftr.configuration import cfg


datasets = list(df_all["user_attrs_dataset"].unique())

df_results = pd.DataFrame(index=datasets)

for study_name in STUDY_NAMES:
    df = df_all[df_all["study_name"] == study_name].copy()
    
    filter = (df["params_rag_template_name"] == "default") \
        & (df["params_rag_mode"] == "rag") \
        & (df["params_splitter_chunk_size"] == 1024) \
        & (df["params_splitter_method"] == "token") \
        & (df["params_rag_method"] == "dense") \
        & (df["params_reranker_enabled"] == False) \
        & (df["params_hyde_enabled"] == False) \
        & (df["params_additional_context_enabled"] == False) \
        & (df["params_rag_response_synthesizer_llm"] == "gpt-4o-mini") \
        & (df["params_rag_embedding_model"] == "BAAI/bge-small-en-v1.5") \

    df_baseline = df.loc[filter].copy()

    df_baseline['primary_llm'] = df_baseline["params_rag_response_synthesizer_llm"]

    df_baseline["preferred_llm"] = (
        df_baseline["primary_llm"] == "gpt-4o-std"
    ) + df_baseline["primary_llm"].str.startswith("gpt")
    df_baseline = df_baseline.sort_values(
        by=["preferred_llm", "values_0", "values_1"],
        ascending=[False, False, True],
    )

    df_baseline = df_baseline.loc[df_baseline["user_attrs_metric_num_success"].idxmax(), :].copy()
    
    dataset = df_baseline["user_attrs_dataset"]
    df_results.loc[dataset, "values_0_baseline"] = df_baseline["values_0"]
    df_results.loc[dataset, "values_1_baseline"] = df_baseline["values_1"]
    df_results.loc[dataset, "flow"] = df_baseline["user_attrs_flow"]

    filter = df["values_1"] <= 1.01 * df_baseline["values_1"].min()
    idx = df.loc[filter, "values_0"].idxmax()
    df_results.loc[dataset, "values_0_pareto_at_baseline"] = df.loc[
        idx, "values_0"
    ]

    filter = df["values_0"] >= 0.99 * df_baseline.loc["values_0"].max()
    idx = df.loc[filter, "values_1"].idxmin()
    df_results.loc[dataset, "values_1_pareto_at_baseline"] = df.loc[
        idx, "values_1"
    ]
    
    df_results.loc[dataset, "values_0_pareto"] = df["values_0"].max()
    df_results.loc[dataset, "values_1_pareto"] = df["values_1"].min()

    df_results.loc[dataset, "values_0_improvement_at_baseline"] = (
        df_results.loc[dataset, "values_0_pareto_at_baseline"]
        - df_results.loc[dataset, "values_0_baseline"]
    )

    df_results.loc[dataset, "values_1_improvement_at_baseline"] = (
        df_results.loc[dataset, "values_1_baseline"]
        - df_results.loc[dataset, "values_1_pareto_at_baseline"]
    )
    df_results.loc[dataset, "values_0_improvement_max"] = (
        df_results.loc[dataset, "values_0_pareto"]
        - df_results.loc[dataset, "values_0_baseline"]
    )
    df_results.loc[dataset, "values_1_improvement_max"] = (
        df_results.loc[dataset, "values_1_baseline"]
        - df_results.loc[dataset, "values_1_pareto"]
    )

df_results.to_csv(cfg.paths.results_dir / "small-models--improvements-from-baseline.csv")

In [None]:
from syftr.optuna_helper import get_pareto_df

df_pareto = get_pareto_df("bench14--small-models--infinitebench")

# transform cost to be in Cents per 100 calls
df_pareto["values_1"] *= 10000
df_pareto = df_pareto.sort_values(by=["values_0", "values_1"], ascending=[False, True])
df_pareto