In [None]:
import os
import sys
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from typing import Optional

sys.path.append('../')
sys.path.append('../../')
sys.path.append('../../../')
sys.path.append('../../src')
sys.path.append('../../prompts')
sys.path.append('../../src/llmperf')

from benchmarking.utils import read_perf_eval_json_files
from synthetic_performance_eval_script import load_requests_with_switching

# Load data from bundled model runs

In [None]:
# results_dir = '../data/results/tracking_tests'
results_dir = '../../data/bundle_tests/aa_tests/20260113-110355.785523'

# Analyze metrics through models

## Read summary json files

In [None]:
df = read_perf_eval_json_files(results_dir, type='summary')
df = df.set_index(['num_input_tokens','num_output_tokens','num_concurrent_requests'])
df.head(10)

In [None]:
def plot_benchmarking_data(df, ycol, ylabel):
    df = df.reset_index()
    # Create a new column combining 'in' and 'out' into a single legend label
    df['in_out'] = df.apply(lambda row: f"({row['num_input_tokens']}, {row['num_output_tokens']})", axis=1)

    plt.figure(figsize=(8, 5))
    
    # Plot main performance metric
    sns.lineplot(data=df, x='num_concurrent_requests', y=ycol, hue='in_out', marker='o', linewidth=2)
    
    # Plot client_output_token_per_sec as a dashed line
    if ycol=='output_token_per_s_p50' and 'client_output_token_per_s_p50' in df.columns:
        sns.lineplot(data=df, x='num_concurrent_requests', y='client_output_token_per_s_p50', hue='in_out', 
                     marker='o', linestyle='dashed', alpha=0.6)
    
    # Set x-axis to log2 scale
    plt.xscale("log", base=2)
    
    # Customize x-axis ticks to show real numbers instead of 2^n notation
    xticks = sorted(df['num_concurrent_requests'].unique())
    plt.xticks(xticks, labels=[str(x) for x in xticks])
    
    # Labels and title
    plt.xlabel("Concurrency [log2 scale]")
    plt.ylabel(ylabel)
    if ycol=='output_token_per_s_p50':
        plt.title("Performance for Different (in, out) Token Combinations\n(Solid = Server, Dashed = Client)")
    else:
        plt.title("Performance for Different (in, out) Token Combinations")
    
    plt.legend(title="(in, out)", bbox_to_anchor=(1.05, 1), loc="upper left")
    plt.grid(True, which="both", linestyle="--")
    
    plt.show();

In [None]:
plot_benchmarking_data(df, 'server_output_token_per_s_p50', 'Output Tokens per Second per Request (median)')
plot_benchmarking_data(df, 'server_ttft_s_p50', 'Server Time to First Token')
plot_benchmarking_data(df, 'client_ttft_s_p50', 'Client Time to First Token')
plot_benchmarking_data(df, 'client_total_output_throughput', 'E2E Token Throughput per Sec')


# Analyze switching time

__Note:__ It's recomended that you've warmed up your environment previously by running a first round of models with their available sequence sizes and batch sizes combinations, so the switching time is more probable to show up.

In [None]:
df_requests = load_requests_with_switching(
    output_files_dir=results_dir,
    read_perf_eval_json_files_fn=read_perf_eval_json_files,
)

# get model name 
df_requests['model'] = df_requests['filename'].apply(lambda x: x.split('_')[2])

cols = [
    "uuid",
    "model",
    "server_number_input_tokens",
    "server_number_output_tokens",
    "start_time",
    "server_ttft_s",
    "requests_batching_per_request",
    "switching_time",
]

df_switching_per_request = df_requests[cols].sort_values("start_time")
df_switching_per_request

In [None]:
def _parse_start_time(df: pd.DataFrame, col: str = "start_time") -> pd.Series:
    """
    Robust parsing for start_time provided as string.
    Supports:
      - ISO timestamps
      - Datetime-like strings
      - Epoch seconds (as string)
    """
    # Try datetime parsing first
    parsed = pd.to_datetime(df[col], errors="coerce", utc=True)

    if parsed.isna().all():
        # Fallback: epoch seconds as string
        try:
            parsed = pd.to_datetime(
                df[col].astype(float),
                unit="s",
                utc=True,
            )
        except Exception as e:
            raise ValueError(
                f"Unable to parse '{col}' as datetime or epoch seconds"
            ) from e

    if parsed.isna().any():
        raise ValueError(
            f"Some values in '{col}' could not be parsed. "
            "Please check the input format."
        )

    return parsed


def plot_ttft_switching_scatter(
    df_requests: pd.DataFrame,
    save_html: bool = False,
    output_path: Optional[str] = None,
    title: str = "TTFT vs Request Start Time (Switching Effects)",
):
    """
    Interactive Plotly scatter plot:
      - x: request start time (relative, seconds)
      - y: server TTFT
      - color: model + sequence size
      - symbol: batch size

    Handles start_time provided as string.
    """

    if df_requests.empty:
        raise ValueError("df_requests is empty")

    df = df_requests.copy()

    # --------------------------------------------------
    # Parse start_time safely (string -> datetime)
    # --------------------------------------------------
    df["_start_dt"] = _parse_start_time(df, "start_time")

    t0 = df["_start_dt"].min()
    df["start_time_rel_s"] = (df["_start_dt"] - t0).dt.total_seconds()

    # --------------------------------------------------
    # Derived visualization fields
    # --------------------------------------------------
    df["sequence"] = (
        df["server_number_input_tokens"].astype(str)
        + "→"
        + df["server_number_output_tokens"].astype(str)
    )

    df["model_sequence"] = df["model"] + " | " + df["sequence"]

    # --------------------------------------------------
    # Plotly scatter
    # --------------------------------------------------
    fig = px.scatter(
        df,
        x="start_time_rel_s",
        y="server_ttft_s",
        color="model_sequence",
        symbol="requests_batching_per_request",
        title=title,
        labels={
            "start_time_rel_s": "Request start time (relative, s)",
            "server_ttft_s": "Server TTFT (s)",
            "model_sequence": "Model | Sequence",
            "requests_batching_per_request": "Batch size",
        },
        hover_data={
            "uuid": True,
            "model": True,
            "sequence": True,
            "requests_batching_per_request": True,
            "switching_time": True,
            "start_time": True,  # original string preserved
        },
    )

    fig.update_traces(marker=dict(size=8, opacity=0.75))
    fig.update_layout(
        template="plotly_white",
        height=600,
        legend_title_text="Model | Sequence",
    )

    # --------------------------------------------------
    # Optional HTML export
    # --------------------------------------------------
    if save_html:
        if not output_path:
            raise ValueError("output_path must be provided when save_html=True")

        output_path = os.path.expanduser(output_path)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        fig.write_html(
            output_path,
            full_html=True,
            include_plotlyjs="cdn",
        )

        print(f"✅ Plot saved to {output_path}")

    return fig


In [None]:
fig = plot_ttft_switching_scatter(
    df_requests,
)
fig.show()

In [None]:
# consolidation file path
parent_dir = os.path.dirname(results_dir)
run_name = os.path.basename(results_dir)
consolidation_file = os.path.join(parent_dir, f'consolidated_results/{run_name}.xlsx')

In [None]:
def plot_model_performance_dashboard(
    consolidation_file: str,
    save_html: bool = False,
    output_path: Optional[str] = None,
    title="Model Performance Dashboard (Tokens × Concurrency)"
):
    """
    Create a per-model performance dashboard with:
      - Switching Time
      - Server TTFT p50
      - Output Tokens/s p50

    X-axis is a composite of (num_input_tokens × num_concurrent_requests),
    ordered independently per model.

    Parameters
    ----------
    consolidation_file : str
        Path to the Excel file containing consolidated benchmarking results.
    save_html : bool, optional
        Whether to save the figure as an HTML file, by default False.
    output_path : Optional[str], optional
        Path where the HTML file will be saved.
        Required if save_html=True.

    Returns
    -------
    plotly.graph_objects.Figure
        The generated Plotly figure.
    """

    # =========================
    # Load data
    # =========================
    df = pd.read_excel(consolidation_file)

    # =========================
    # Prepare composite X axis
    # =========================
    df = df.sort_values(
        by=["model", "num_input_tokens", "num_concurrent_requests"]
    )

    df["x_label"] = (
        "tokens=" + df["num_input_tokens"].astype(str)
        + " | conc=" + df["num_concurrent_requests"].astype(str)
    )

    models = df["model"].unique()
    rows = len(models)
    cols = 3

    # =========================
    # Subplot titles
    # =========================
    subplot_titles = []
    for model in models:
        subplot_titles.extend(
            [
                f"{model} - Switching Time",
                f"{model} - Server TTFT p50",
                f"{model} - Output Tokens/s p50",
            ]
        )

    fig = make_subplots(
        rows=rows,
        cols=cols,
        subplot_titles=subplot_titles,
    )

    # =========================
    # Add traces + per-model x-axis
    # =========================
    row_idx = 1

    for model in models:
        model_df = df[df["model"] == model]

        # Model-specific x-axis order
        model_x_order = model_df["x_label"].unique().tolist()

        fig.add_trace(
            go.Scatter(
                x=model_df["x_label"],
                y=model_df["switching_time"],
                mode="markers+lines",
            ),
            row=row_idx,
            col=1,
        )

        fig.add_trace(
            go.Scatter(
                x=model_df["x_label"],
                y=model_df["server_ttft_s_p50"],
                mode="markers+lines",
            ),
            row=row_idx,
            col=2,
        )

        fig.add_trace(
            go.Scatter(
                x=model_df["x_label"],
                y=model_df["server_output_token_per_s_p50"],
                mode="markers+lines",
            ),
            row=row_idx,
            col=3,
        )

        # Apply x-axis ordering only to this model's row
        for col_idx in range(1, cols + 1):
            fig.update_xaxes(
                categoryorder="array",
                categoryarray=model_x_order,
                tickangle=45,
                row=row_idx,
                col=col_idx,
            )

        row_idx += 1

    # =========================
    # Layout
    # =========================
    fig.update_layout(
        height=350 * rows,
        width=1600,
        title=title,
        template="plotly_white",
        showlegend=False,
    )

    # =========================
    # Output handling
    # =========================
    if save_html:
        if not output_path:
            raise ValueError("output_path must be provided when save_html=True")
        fig.write_html(output_path)

    return fig


In [None]:
fig = plot_model_performance_dashboard(
    consolidation_file=consolidation_file,
    save_html=True,
    output_path="model_performance_dashboard.html",
    title="Model Performance Dashboard (Tokens × Concurrency)"
)
fig.show()