## Analysis

This notebook is used to visualize optimization results in form of plots and an animation.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB.tb_highlight = "bg:#3e0054"

In [None]:
import matplotlib.pyplot as plt

# you need to restart the notebook for changes to take effect
USE_DARK_STYLE = False

if USE_DARK_STYLE:
    plt.style.use('dark_background')

In [None]:
from pathlib import Path

import pandas as pd

from syftr.configuration import cfg
from syftr.sota import get_sota

STUDY_NAME = "rank0--rag-and-agents--financebench_hf"
SUCCESS_RATE = 0.9
SHOW_SOTA = False
# USE_LOG_SCALE = False
USE_COSTS = True

OBJ1_NAME = cfg.plotting.target_accuracy_name
OBJ1_UNIT = cfg.plotting.target_accuracy_unit
if USE_COSTS:
    OBJ2_NAME = cfg.plotting.target_cost_name
    OBJ2_UNIT = cfg.plotting.target_cost_unit
else:
    OBJ2_NAME = cfg.plotting.target_latency_name
    OBJ2_UNIT = cfg.plotting.target_latency_unit

OBJ1_CUT_BELOW_PERCENTILE = 20
OBJ2_CUT_ABOVE_PERCENTILE = 70

STORAGE = cfg.postgres.get_optuna_storage()

DATAROBOT_PURPLE = cfg.plotting.datarobot_purple
DATAROBOT_GREEN = cfg.plotting.datarobot_green

COLOR_PARETO = DATAROBOT_PURPLE
COLOR_RUN = "gray"
COLOR_LATENCY = DATAROBOT_GREEN
COLOR_ACCURACY = DATAROBOT_PURPLE
COLOR_BASELINE = "gray"
COLOR_IMPROVED = DATAROBOT_PURPLE
COLOR_KNEE = "white"
COLOR_BW = "white" if USE_DARK_STYLE else "black"
COLOR_BW_INVERSE = "black" if USE_DARK_STYLE else "white"

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

RESULTS_DIR: Path = cfg.paths.results_dir
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR = RESULTS_DIR.resolve()

SOTA = get_sota(STUDY_NAME)

In [None]:
import optuna

from syftr.optuna_helper import get_completed_trials

study = optuna.load_study(study_name=STUDY_NAME, storage=STORAGE)
df_trials: pd.DataFrame = get_completed_trials(study, SUCCESS_RATE)
df_trials["values_1"] *= 10000  # convert from $ per call to cents per 100 calls

print(f"The study '{STUDY_NAME}' has {len(df_trials)} completed trials")

In [None]:
df_trials[:5]

In [None]:
from kneed import KneeLocator
from paretoset import paretoset

pareto_mask = paretoset(df_trials[["values_0", "values_1"]], sense=["max", "min"])
df_pareto = df_trials[pareto_mask].copy()
df_pareto = df_pareto.sort_values("values_0")
df_pareto.reset_index(inplace=True, drop=True)
df_pareto = df_pareto.drop(
    ["datetime_start", "datetime_complete", "duration", "state"], axis=1
)
df_pareto

In [None]:
df_knee = df_pareto.sort_values(by="values_1")

knee = KneeLocator(
    df_knee["values_1"],
    df_knee["values_0"],
    curve="concave",
    direction="increasing",
)

knee_point = knee.knee
if knee_point:
    knee_trial_num = df_knee[df_knee["values_1"] == knee.knee]["number"].values[0]
else:
    knee_trial_num = None

In [None]:
import pandas as pd


class Baseline:
    CONSTRAINTS = None

    def __init__(self, data):
        self._data = data
        if self.CONSTRAINTS:
            condition = pd.Series([True] * len(self._data))
            for column, value in self.CONSTRAINTS.items():
                condition &= self._data[column] == value
            df: pd.DataFrame = self._data[condition]
            df.reset_index(drop=True, inplace=True)
            self._obj1 = df.at[0, "values_0"]
            self._obj2 = df.at[0, "values_1"]

    @property
    def name(self):
        return self.NAME

    @property
    def short_name(self):
        return self.SHORT_NAME

    @property
    def performance_label(self):
        return self.PERFORMANCE_LABEL

    @property
    def obj1(self):
        return self._obj1

    @property
    def obj2(self):
        return self._obj2

    @property
    def color(self):
        return self.COLOR

    @property
    def marker(self):
        return self.MARKER


baselines = []

try:

    class TokenSplitterBaseline(Baseline):
        NAME = "Phi-4 + RAG (Token Splitter)"
        PERFORMANCE_LABEL = "LlamaIndex Pipeline Performance"
        CONSTRAINTS = {
            "params_additional_context_enabled": False,
            "params_few_shot_embedding_model": "Linq-AI-Research/Linq-Embed-Mistral",
            "params_few_shot_enabled": True,
            "params_few_shot_top_k": 16,
            "params_hyde_enabled": False,
            "params_rag_method": "sparse",
            "params_rag_mode": "rag",
            "params_rag_query_decomposition_enabled": False,
            "params_rag_top_k": 9,
            "params_reranker_enabled": False,
            "params_response_synthesizer_llm": "microsoft/Phi-4-multimodal-instruct",
            "params_splitter_chunk_exp": 9,
            "params_splitter_chunk_overlap_frac": 0.5,
            "params_splitter_method": "token",
            "params_template_name": "default"
        }
        COLOR = "pink"
        MARKER = "o"

    baseline_token = TokenSplitterBaseline(df_trials)
    baselines.append(baseline_token)

except:
    print("Cannot add LlamaIndexTokenSplitterBaseline")


try:

    class SentenceSplitterBaseline(Baseline):
        NAME = "Llama-3.3-Nemotron-Super-49B + RAG (Sentence Splitter)"
        PERFORMANCE_LABEL = "LlamaIndex Pipeline Performance"
        CONSTRAINTS = {
            "params_additional_context_enabled": True,
            "params_additional_context_num_nodes": 2,
            "params_hyde_enabled": False,
            "params_rag_embedding_model": "Linq-AI-Research/Linq-Embed-Mistral",
            "params_rag_method": "dense",
            "params_rag_mode": "rag",
            "params_rag_query_decomposition_enabled": False,
            "params_rag_top_k": 5,
            "params_reranker_enabled": False,
            "params_response_synthesizer_llm": "nvidia/Llama-3_3-Nemotron-Super-49B",
            "params_splitter_chunk_exp": 10,
            "params_splitter_chunk_overlap_frac": 0.25,
            "params_splitter_method": "sentence",
            "params_template_name": "default"
        }
        COLOR = "cyan"
        MARKER = "o"

    baseline_sentence = SentenceSplitterBaseline(df_trials)
    baselines.append(baseline_sentence)

except:
    print("Cannot add LlamaIndexSentenceSplitterBaseline")

if baselines:
    baseline = baselines[-1]

In [None]:
import numpy as np


def set_lim(ax, lat, acc, sota_acc=0):
    ax.set_xlim(
        0.01,
        np.percentile(lat, OBJ2_CUT_ABOVE_PERCENTILE),
    )
    ax.set_ylim(
        np.percentile(acc, OBJ1_CUT_BELOW_PERCENTILE),
        1.1 * max(acc.max(), sota_acc),
    )

In [None]:
import pandas as pd

pareto_indices = df_trials[pareto_mask].index
show_data = True
show_pareto = True
show_baselines = True
show_knee = True

_, ax = plt.subplots(figsize=(10, 6))

# set_lim(ax, df_trials["values_1"], df_trials["values_0"])

if show_data:
    ax.scatter(
        df_trials["values_1"],
        df_trials["values_0"],
        c=COLOR_RUN,
        zorder=1,
        s=10,
        label=f"{len(df_trials)} Trials",
        alpha=0.5,
    )

df_pareto: pd.DataFrame = df_trials.iloc[pareto_indices].copy()
df_pareto = df_pareto.sort_values(by="values_0")

if SHOW_SOTA and SOTA:
    ax.axhline(
        y=SOTA.accuracy,
        color="darkgray",
        linestyle="--",
        label=f"SOTA Accuracy: {SOTA.accuracy:.2f} ({SOTA.method_name})",
        linewidth=1,
    )

if show_pareto:
    ax.plot(
        df_pareto["values_1"],  # latency or cost
        df_pareto["values_0"],  # accuracy
        marker="o",
        color=COLOR_PARETO,
        label="Pareto Frontier",
        zorder=2,
        markersize=5,
    )

if knee_trial_num and show_knee:
    df_knee = df_pareto[df_pareto["number"] == knee_trial_num]
    ax.scatter(
        df_knee["values_1"],
        df_knee["values_0"],
        color=COLOR_KNEE,
        marker="*",
        label="Knee Point",
        zorder=3,
        s=80,
        edgecolors=COLOR_BW,
        linewidths=0.5,
    )

if show_baselines:
    for b in baselines:
        ax.scatter(
            b.obj2,
            b.obj1,
            color=b.color,
            marker=b.marker,
            label=b.name,
            zorder=4,
            s=30,
            edgecolors=COLOR_BW,
            linewidths=0.5,
        )

ax.set_xlabel(OBJ2_UNIT)
ax.set_ylabel(OBJ1_UNIT)
ax.set_title(f"Optimization of a GenAI Pipeline ({STUDY_NAME})")

ax.set_xscale('log')

ax.legend(loc='lower right', framealpha=1)
plt.tight_layout()
plt.savefig(RESULTS_DIR / f"{STUDY_NAME}-pareto-frontier.jpg")
plt.show()

In [None]:
acc_improved_trial_num = df_trials.loc[df_trials["values_0"].idxmax(), "number"]
lat_improved_trial_num = df_trials.loc[df_trials["values_1"].idxmin(), "number"]

In [None]:
if baselines:

    title = f"Comparison with {baseline.name} ({STUDY_NAME})"

    lat_color = COLOR_LATENCY

    base = [
        baseline.accuracy,
        baseline.latency,
    ]
    acc_optimal = [
        df_trials[df_trials["number"] == acc_improved_trial_num]["values_0"],
        df_trials[df_trials["number"] == acc_improved_trial_num]["values_1"],
    ]
    lat_optimal = [
        df_trials[df_trials["number"] == lat_improved_trial_num]["values_0"],
        df_trials[df_trials["number"] == lat_improved_trial_num]["values_1"],
    ]

    labels = ["Accuracy", "Latency"]
    x = np.arange(len(labels))

    width = 0.3
    gap = 0.1

    fig, ax1 = plt.subplots(figsize=(8, 6))
    ax1.tick_params(axis="y", labelcolor=COLOR_BW)
    ax2 = ax1.twinx()

    ax1.bar(
        0 - 2 * width / 3 - gap / 3,
        base[0],
        width - gap,
        label=baseline.name,  # .replace(" ", "\n")
        color=COLOR_BASELINE,
    )
    ax1.bar(
        0,
        lat_optimal[0],
        width - gap,
        label="Latency\nImproved",
        color=COLOR_LATENCY,
    )
    ax1.bar(
        0 + 2 * width / 3 + gap / 3,
        acc_optimal[0],
        width - gap,
        label="Accuracy\nImproved",
        color=COLOR_ACCURACY,
    )

    ax2.bar(
        1 - 2 * width / 3 - gap / 3,
        base[1],
        width - gap,
        color=COLOR_BASELINE,
    )
    ax2.bar(
        1,
        lat_optimal[1],
        width - gap,
        color=COLOR_LATENCY,
    )
    ax2.bar(
        1 + 2 * width / 3 + gap / 3,
        acc_optimal[1],
        width - gap,
        color=COLOR_ACCURACY,
    )

    ax2.tick_params(axis="y", labelcolor=COLOR_BW)

    ax1.set_title(title)
    ax1.set_xticks(x)
    ax1.set_xticklabels(labels)

    ax1.set_ylabel(cfg.plotting.target_name_0)
    ax2.set_ylabel(cfg.plotting.target_name_1)

    fig.legend(
        loc="upper left",
        bbox_to_anchor=(0, 1),
        bbox_transform=ax1.transAxes,
    )
    plt.tight_layout()
    plt.savefig(RESULTS_DIR / f"{STUDY_NAME}-improvements-over-baseline.jpg")
    plt.show()

In [None]:
import matplotlib.animation as animation
from matplotlib.colors import LinearSegmentedColormap, Normalize

fig, ax = plt.subplots(figsize=(10, 6))


def generate_white_to_blue_cmap(N):
    cmap = LinearSegmentedColormap.from_list("white_to_blue", [COLOR_BW_INVERSE, COLOR_BW], N=N)
    colors = cmap(np.linspace(0, 1, N))
    return colors


colors = generate_white_to_blue_cmap(len(df_trials))

cmap = LinearSegmentedColormap.from_list("white_to_blue", [COLOR_BW_INVERSE, COLOR_BW])

norm = Normalize(vmin=0, vmax=len(df_trials))
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label("Number of Trials")

scat_data = ax.scatter(
    [],
    [],
    edgecolor=COLOR_BW,
    linewidths=0.5,
    s=20,
)
(scat_pareto,) = ax.plot(
    [df_trials["values_1"][0]],
    [df_trials["values_0"][0]],
    marker="o",
    color=COLOR_PARETO,
    label="Pareto Frontier",
    markersize=5,
)

ax.set_xlabel(OBJ2_UNIT)
ax.set_ylabel(OBJ1_UNIT)
ax.set_title(f"Optimization of a GenAI Pipeline ({STUDY_NAME})")
set_lim(ax, df_trials["values_1"], df_trials["values_0"])

# ax.set_xscale('log')

text = ax.text(0.02, 0.96, "", transform=ax.transAxes, horizontalalignment='left')


ax.legend(loc="upper right")
plt.tight_layout()


def init():
    scat_data.set_offsets(np.empty((0, 2)))
    scat_pareto.set_data([], [])
    text.set_text("")
    return scat_data, scat_pareto, text


def update(frame):
    text.set_text(f"Trial: {frame+1}")

    df_tmp = df_trials[df_trials.index < frame + 2]

    mask = paretoset(df_tmp[["values_0", "values_1"]], sense=["max", "min"])
    pareto_indices = df_tmp[mask].index

    scat_data.set_offsets(np.column_stack((df_tmp["values_1"], df_tmp["values_0"])))
    scat_data.set_color(colors[: frame + 2])
    scat_data.set_edgecolor("gray")
    scat_data.set_linewidth(1)

    cbar.update_normal(sm)

    df_pareto: pd.DataFrame = df_tmp.iloc[pareto_indices].copy()
    df_pareto = df_pareto.sort_values(by="values_0")

    scat_pareto.set_data(df_pareto["values_1"], df_pareto["values_0"])

    return scat_data, scat_pareto, text


ani = animation.FuncAnimation(
    fig, update, frames=len(df_trials) - 2, init_func=init, blit=True
)

# requires ffmpeg
ani.save(RESULTS_DIR / f"{STUDY_NAME}.mp4", writer="ffmpeg", fps=20)

In [None]:
!ffmpeg -y -i {RESULTS_DIR}/{STUDY_NAME}.mp4 -vf "fps=50,scale=640:-1:flags=lanczos" -gifflags -transdiff {RESULTS_DIR}/{STUDY_NAME}.gif