In [20]:

import os
import pandas as pd
import numpy as np
from pathlib import Path
import numpy as np
import pandas as pd
import plotly.express as px
import json 
import re
from typing import Optional
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go
import plotly.colors as pc
from plotly.subplots import make_subplots
import pandas as pd
import re
import math

# Using batch sizes from grafana

In [21]:
def plot_ttft_scatter(df):
    # Ensure batch_size is string for consistent categorical handling
    df["batch_size"] = df["batch_size"].astype(str)

    # Sort unique batch sizes numerically (cast to int for sorting, back to str for labels)
    batch_order = sorted(df["batch_size"].unique(), key=lambda x: int(x))

    # Make batch_size categorical with proper order
    df["batch_size"] = pd.Categorical(df["batch_size"], categories=batch_order, ordered=True)

    # Add jitter (in milliseconds)
    # df["time"] = pd.to_datetime(df["time"])
    df["time_jittered"] = df["start_time_formatted"] + pd.to_timedelta(
        np.random.uniform(-200, 200, size=len(df)), unit="ms"
    )

    # Define orders
    model_order = sorted(df["model"].unique())
    combo_order = [f"{b} - {m}" for b in batch_order for m in model_order]

    # New column: batch_size + model
    df["batch_model"] = df["batch_size"].astype(str) + " - " + df["model"]

    # Palette (large enough for batch_size x model combos)
    palette = px.colors.qualitative.Safe + px.colors.qualitative.Set2 + px.colors.qualitative.Dark24

    fig = px.scatter(
        df,
        x="time_jittered",
        y="time_to_first_token",
        color="batch_model",              # color by batch_size + model combo
        symbol="model",                   # marker shape by model
        category_orders={
            "batch_model": combo_order,
            "model": model_order
        },
        color_discrete_sequence=palette,  # categorical palette
        title="Time to First Token per Request",
        labels={
            "time_jittered": "Time",
            "time_to_first_token": "Time to First Token (s)",
            "batch_size": "Batch Size",
            "model": "Model",
            "batch_model": "Batch Size - Model"
        },
        hover_data=["batch_size", "model"]
    )

    # Style markers
    fig.update_traces(
        mode="markers",
        marker=dict(size=8, opacity=0.7, line=dict(width=0.5, color="black"))
    )

    # Make legends clearer (color vs symbol)
    fig.update_layout(
        legend=dict(
            title="Legend",
            groupclick="toggleitem"
        )
    )

    fig.show()
    
def find_uuid(file_name: str) -> Optional[str]:
    match = re.search(r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', file_name)
    uuid = None
    if match:
        uuid = match.group()
    else:
        raise ValueError(f"UUID not found in filename {file_name}")
        
    return uuid


def read_json_files_to_df(directory: str) -> pd.DataFrame:
    data = []

    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('individual_responses.json'):
            model_name = '_'.join(filename.split('_')[2:3])
            file_path = os.path.join(directory, filename)
            # Read the JSON file
            with open(file_path, 'r') as file:
                json_data = json.load(file)
                # Extract relevant fields from each JSON object and append to the data list
                for item in json_data:
                    if pd.isnull(item['error_code']):
                        data.append(
                            {
                                'start_time': datetime.strptime(item['start_time'], "%H:%M:%S.%f"),
                                'end_time': datetime.strptime(item['end_time'], "%H:%M:%S.%f"),
                                'client_end_to_end_latency_s': item['client_end_to_end_latency_s'],
                                'client_ttft_s': item['client_ttft_s'],
                                'model_name': model_name,
                                'uuid': find_uuid(filename),
                            }
                        )
    # Create a DataFrame from the data list
    df = pd.DataFrame(data)
    return df

def rgb_to_hex(rgb_str: str) -> str:
    """Convert 'rgb(r,g,b)' string to hex string like '#rrggbb'."""
    nums = list(map(int, re.findall(r"\d+", rgb_str)))
    return "#{:02x}{:02x}{:02x}".format(*nums)

def lighten_color(color: str, factor: float = 0.6) -> str:
    """
    Lightens a color (hex or rgb string) by interpolating toward white.
    Always returns a hex string.
    """
    if color.startswith("rgb"):
        color = rgb_to_hex(color)
    r, g, b = pc.hex_to_rgb(color)
    lighter_rgb = (
        int(r + (255 - r) * factor),
        int(g + (255 - g) * factor),
        int(b + (255 - b) * factor),
    )
    return "#{:02x}{:02x}{:02x}".format(*lighter_rgb)

def plot_requests_gantt_chart(df_user: pd.DataFrame, output_dir: str, file_name: str):
    """
    Plots a Gantt chart of response timings across all requests,
    with per-model metric colors and layered TTFT vs latency.
    """    
    requests = df_user.index + 1
    fig = go.Figure()

    # Assign a base color per model
    unique_models = df_user["model_name"].unique()
    palette = pc.qualitative.Set2
    color_map = {model: palette[i % len(palette)] for i, model in enumerate(unique_models)}

    # Add bars for each metric and model
    for model in unique_models:
        model_df = df_user[df_user["model_name"] == model]

        # End-to-end latency → lighter color
        fig.add_trace(
            go.Bar(
                y=model_df.index + 1,
                x=1000 * model_df["client_end_to_end_latency_s"],
                base=[str(x) for x in model_df["start_time"]],
                name=f"End-to-end latency - {model}",
                orientation="h",
                marker_color=lighten_color(color_map[model], factor=0.5),
                legendgroup=model,
                offsetgroup=f"{model}-latency",
            )
        )

        # TTFT → normal color (plotted after to be on top)
        fig.add_trace(
            go.Bar(
                y=model_df.index + 1,
                x=1000 * model_df["client_ttft_s"],
                base=[str(x) for x in model_df["start_time"]],
                name=f"TTFT - {model}",
                orientation="h",
                marker_color=lighten_color(color_map[model], factor=0.0),  # keep base color
                legendgroup=model,
                offsetgroup=f"{model}-ttft",
            )
        )

    # Alternate row shading
    for i in range(0, len(df_user.index), 2):
        fig.add_hrect(y0=i + 0.5, y1=i + 1.5, line_width=0, fillcolor="grey", opacity=0.1)

    fig.update_xaxes(
        type="date",
        tickformat="%H:%M:%S",
        hoverformat="%H:%M:%S.%2f",
    )
    fig.update_layout(
        barmode="overlay",  # ensures TTFT overlays latency
        title_text="LLM requests across time",
        xaxis_title="Time stamp",
        yaxis_title="Request index",
    )

    fig.show()
    fig.write_html(f"{output_dir}/llms_across_time-{file_name}.html", include_plotlyjs="cdn")

    return fig

def get_reports(grafana_path, jsons_path, file_name):
    
    # ttft plot
    df = pd.read_csv(grafana_path)

    df.sort_values('start_time', inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['start_time_formatted'] = pd.to_datetime(df['start_time'], unit="s")
    df['end_time_formatted'] = pd.to_datetime(df['end_time'], unit="s")
    
    plot_ttft_scatter(df)
    
    # llm over time plot
    df = read_json_files_to_df(jsons_path)
    df = df.sort_values('end_time').reset_index(drop=True)
    
    output_dir = os.path.dirname(grafana_path)
    plot_requests_gantt_chart(df, output_dir, file_name=file_name)


In [22]:
# for testing
# filename = '5_ckpts-100_reqs-unevenly_qps_spread'
# grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}-test.csv'

def create_summary(grafana_path):

    # Read CSV
    df = pd.read_csv(grafana_path)
    # print(df.columns)
    df.sort_values(['start_time','time_to_first_token'], ascending=[True, False], inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['start_time_formatted'] = pd.to_datetime(df['start_time'], unit="s")
    df['end_time_formatted'] = pd.to_datetime(df['end_time'], unit="s")
    
    # Calculate decoding_time
    df['decoding_time'] = df['total_latency'] - df['time_to_first_token']
    
    # Compute group_ids
    df["group_ids"] = (
        df[["model", "prompt_tokens_count", "batch_size"]]
        .ne(df[["model", "prompt_tokens_count", "batch_size"]].shift())
        .any(axis=1)
        .cumsum()
    )
    # df[['group_ids','start_time_formatted','end_time_formatted','model','prompt_tokens_count','batch_size','time_to_first_token']]
    
    # Create markers
    df["candidate_switch"] = False
    df["switch_hit"] = False

    # Identify candidate switches and switch hits 
    unique_groups = sorted(df["group_ids"].unique())

    for group in unique_groups:  # include first group
        # Find the first row of this group
        first_row_idx = df[df["group_ids"] == group].index[0]
        df.loc[first_row_idx, "candidate_switch"] = True

        # Check switch_hit condition (only if group has >1 row)
        group_rows = df[df["group_ids"] == group][["time_to_first_token","batch_size"]].reset_index(drop=True)
        if len(group_rows["time_to_first_token"]) > 1 and not (group_rows["batch_size"].eq(1).all()):
            first_val = group_rows["time_to_first_token"].iloc[0]
            rest_vals = group_rows["time_to_first_token"].iloc[1:]
            median_rest = rest_vals.median()
            if first_val > median_rest:
                df.loc[first_row_idx, "switch_hit"] = True
     
    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also           
    #     display(df[['group_ids','start_time_formatted','end_time_formatted','model','prompt_tokens_count','batch_size','time_to_first_token','candidate_switch','switch_hit']])
    # print(df["candidate_switch"].sum(), df["switch_hit"].sum())
    
    # Summary statistics
    df_summary = df.groupby(['model','prompt_tokens_count','completion_tokens_count']).agg({'completion_tokens_after_first_per_sec': ['median','min','max','std'], 'total_latency': ['median','min','max','std'], 'time_to_first_token': ['median','min','max','std'], 'decoding_time': ['median','min','max','std'], 'candidate_switch': 'sum', 'switch_hit': 'sum'})
    df_summary.columns = ['_'.join(col) for col in df_summary.columns]
    df_summary = df_summary.rename(columns=lambda c: c
                   .replace("completion_tokens_after_first_per_sec", "Tokens/s")
                   .replace("total_latency", "Latency")
                   .replace("time_to_first_token", "TTFT")
                   .replace("decoding_time", "Decoding Time")
                   )
    
    # Calculate LRU hit rate
    df_summary['LRU Hit Rate'] = (df_summary['candidate_switch_sum'] - df_summary['switch_hit_sum'])/df_summary['candidate_switch_sum']
    
    return df_summary

In [23]:
def plot_batch_size_histogram_grid(grafana_path: str, file_name: str = "batch_size_histogram_grid"):
    """
    Plots batch size histogram distributions per model in a grid.
    Legend shows batch_size-model combinations, sorted by batch_size then model.
    Adds extra horizontal spacing to avoid overlapping subplot titles.
    """

    df_grafana = pd.read_csv(grafana_path)

    # all unique batch sizes and assign consistent colors
    unique_batch_sizes = sorted(df_grafana["batch_size"].unique())
    batch_palette = pc.qualitative.Set3
    batch_color_map = {
        bs: batch_palette[i % len(batch_palette)] for i, bs in enumerate(unique_batch_sizes)
    }

    # models
    unique_models = sorted(df_grafana["model"].unique())
    n_models = len(unique_models)

    # grid layout: 3 columns, enough rows to fit all models
    ncols = 3
    nrows = math.ceil(n_models / ncols)

    fig = make_subplots(
        rows=nrows,
        cols=ncols,
        subplot_titles=[f"{m}" for m in unique_models],
        horizontal_spacing=0.25   # <-- more space between columns
    )

    # collect traces first so we can sort them
    traces = []
    for idx, model in enumerate(unique_models):
        row = idx // ncols + 1
        col = idx % ncols + 1
        model_df = df_grafana[df_grafana["model"] == model]

        for bs in sorted(model_df["batch_size"].unique()):
            bs_df = model_df[model_df["batch_size"] == bs]
            legend_name = f"{bs}-{model}"
            traces.append((
                bs, model, row, col,  # sorting helpers
                go.Histogram(
                    x=bs_df["batch_size"],
                    name=legend_name,
                    marker=dict(color=batch_color_map[bs]),
                    opacity=0.75,
                    showlegend=True
                )
            ))

        # X-axis: only existing batch sizes for that model
        fig.update_xaxes(
            tickmode="array",
            tickvals=sorted(model_df["batch_size"].unique()),
            ticktext=[str(bs) for bs in sorted(model_df["batch_size"].unique())],
            row=row, col=col
        )

    # sort traces by batch_size, then model
    traces = sorted(traces, key=lambda t: (t[0], t[1]))

    # add sorted traces
    for _, _, row, col, trace in traces:
        fig.add_trace(trace, row=row, col=col)

    # layout
    fig.update_layout(
        title="Batch size distributions per model",
        xaxis_title="Batch size",
        yaxis_title="Count",
        barmode="overlay",
        bargap=0.2,
        hoverlabel=dict(font_color="black"),
        height=300 * nrows,
        width=400 * ncols,   # widen figure to match spacing
        legend_title="Batch size - Model"
    )

    fig.show()
    output_dir = os.path.dirname(grafana_path)
    fig.write_html(f"{output_dir}/batch_size_histogram-{file_name}.html", include_plotlyjs="cdn")

    return fig


In [24]:
def plot_total_batch_size_histogram(grafana_path: str, file_name: str = "batch_size_histogram_total"):
    """
    Plots a total batch size histogram across all models.
    Each batch_size is consistently colored.
    """

    df_grafana = pd.read_csv(grafana_path)

    # all unique batch sizes and assign consistent colors
    unique_batch_sizes = sorted(df_grafana["batch_size"].unique())
    batch_palette = pc.qualitative.Set3
    batch_color_map = {
        bs: batch_palette[i % len(batch_palette)] for i, bs in enumerate(unique_batch_sizes)
    }

    # convert to categorical to avoid gaps
    df_grafana["batch_size"] = df_grafana["batch_size"].astype(str)

    # collect traces for each batch_size
    traces = []
    for bs in unique_batch_sizes:
        bs_df = df_grafana[df_grafana["batch_size"] == str(bs)]
        traces.append(
            go.Histogram(
                x=bs_df["batch_size"],
                name=f"{bs}",
                marker=dict(color=batch_color_map[bs]),
                opacity=0.75,
                showlegend=True
            )
        )

    # build figure
    fig = go.Figure(data=traces)

    fig.update_layout(
        title="Total Batch Size Distribution (All Models)",
        xaxis_title="Batch size",
        yaxis_title="Count",
        barmode="overlay",
        bargap=0.2,
        hoverlabel=dict(font_color="black"),
        legend_title="Batch size",
        xaxis=dict(
            type="category",  # categorical spacing, not numeric
            categoryorder="array",
            categoryarray=[str(bs) for bs in unique_batch_sizes]
        )
    )

    # write output
    fig.show()
    output_dir = os.path.dirname(grafana_path)
    fig.write_html(f"{output_dir}/{file_name}.html", include_plotlyjs="cdn")

    return fig


## Compare batch sizes

In [25]:
import pandas as pd
import plotly.graph_objects as go

def comparative_bar_plots(datasets):
    """
    Plot comparative batch size distributions for N datasets.z

    Parameters:
        datasets (dict): Mapping of {series_name: csv_path} where each CSV 
                         contains a 'batch_size' column.
    """
    dfs = []
    totals = []
    series_names = list(datasets.keys())

    # Process each dataset into percentage distributions
    for i, (series_name, path) in enumerate(datasets.items()):
        df = pd.read_csv(path)
        batch_size_counts = df.groupby("batch_size").size().to_frame(name=f"count_{i}").reset_index()
        dfs.append(batch_size_counts)
        totals.append(df.shape[0])

    # Merge all datasets on batch_size
    df_merged = dfs[0]
    for i in range(1, len(dfs)):
        df_merged = df_merged.merge(dfs[i], on="batch_size", how="outer")

    df_merged.fillna(0, inplace=True)

    # Normalize counts to percentages
    for i, total in enumerate(totals):
        df_merged[f"percent_{i}"] = (df_merged[f"count_{i}"] / total * 100).round(2)

    # Ensure batch_size is categorical (string) to remove gaps
    df_merged.sort_values("batch_size", key=lambda x: x.astype(int), inplace=True)
    x_values = df_merged["batch_size"].astype(str)

    # Build the figure
    fig = go.Figure()
    for i, series_name in enumerate(series_names):
        y_values = df_merged[f"percent_{i}"] / 100  # convert to fraction
        fig.add_trace(go.Bar(name=series_name, x=x_values, y=y_values))

    # Layout
    fig.update_layout(
        barmode="group",
        title="Batch Size Distribution Comparison",
        xaxis_title="Batch Size",
        yaxis_title="Percentage",
        xaxis=dict(type="category"),
        yaxis=dict(tickformat=".0%")
    )

    fig.show()


In [26]:
def load_json_logs_to_df(dir_path: str) -> pd.DataFrame:
    """
    Load multiple JSON files containing lists of dicts into a single DataFrame.
    
    Args:
        dir_path (str): Path to directory containing JSON files.

    Returns:
        pd.DataFrame: Combined DataFrame with all records.
    """
    all_records = []

    for root, _, files in os.walk(dir_path):
        for filename in files:
            if filename.endswith(".json"):  # adjust if your extension differs
                file_path = os.path.join(root, filename)
                with open(file_path, "r") as f:
                    try:
                        data = json.load(f)  # expects list[dict]
                        if isinstance(data, list):
                            all_records.extend(data)
                        else:
                            pass
                            # print(f"Skipping {file_path}: not a list of dicts")
                    except json.JSONDecodeError as e:
                        print(f"Error reading {file_path}: {e}")

    return pd.DataFrame(all_records)

import plotly.graph_objects as go
from plotly.subplots import make_subplots

def comparative_dist_plots(kit_paths: dict):
    """
    Plot comparative TTFT histograms for multiple datasets.
    - Each dataset in its own subplot (stacked vertically)
    - All share the same x-axis
    - Histograms normalized to density
    - No fill (only colored borders)
    - Legend shows dataset title
    """
    # one row per dataset
    fig = make_subplots(
        rows=len(kit_paths),
        cols=1,
        shared_xaxes=True,
        subplot_titles=list(kit_paths.keys())
    )

    # loop through datasets
    for i, (title, path) in enumerate(kit_paths.items(), start=1):
        df = load_json_logs_to_df(path)
        fig.add_trace(
            go.Histogram(
                x=df['client_ttft_s'],
                nbinsx=50,
                name=title,
                opacity=1,
                showlegend=False  # we already use subplot titles
            ),
            row=i,
            col=1
        )

    # auto-assign distinct colors to outlines
    fig.update_traces(marker_line_color=None)

    fig.update_layout(
        title='Comparative TTFT Distributions',
        xaxis_title='Time to First Token (s)',
        bargap=0.0,
        height=300 * len(kit_paths),  # scale height with number of plots
    )

    fig.show()


In [27]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def comparative_dist_plots_v2(kit_paths: dict):
    """
    Plot comparative TTFT histograms for multiple datasets.
    - Each dataset in its own subplot (stacked vertically)
    - Each subplot has its own x-axis (not shared)
    - Histograms normalized to density
    - No fill (only colored borders)
    - Legend shows dataset title
    """
    # one row per dataset
    fig = make_subplots(
        rows=len(kit_paths),
        cols=1,
        shared_xaxes=False,  # <-- changed from True
        subplot_titles=list(kit_paths.keys())
    )

    # loop through datasets
    for i, (title, path) in enumerate(kit_paths.items(), start=1):
        df = load_json_logs_to_df(path)
        fig.add_trace(
            go.Histogram(
                x=df['client_ttft_s'],
                nbinsx=50,
                name=title,
                opacity=1,
                showlegend=False  # titles already label subplots
            ),
            row=i,
            col=1
        )

    # auto-assign distinct colors to outlines
    fig.update_traces(marker_line_color=None)

    # Configure layout
    fig.update_layout(
        title='Comparative TTFT Distributions',
        height=300 * len(kit_paths),
        bargap=0.0,
    )

    # Give each subplot its own x-axis label
    for i in range(1, len(kit_paths) + 1):
        fig.update_xaxes(title_text='Time to First Token (s)', row=i, col=1)
        fig.update_yaxes(title_text='Density', row=i, col=1)

    fig.show()


### all runs

In [None]:
qps=50
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [None]:
qps=25
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [None]:
qps=5
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [None]:
qps=2.5
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [None]:
qps=0.5
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

### comparing all 1ks

In [28]:

def calculate_total_tokens_per_s(title: str, path: str) -> pd.DataFrame:

    df = load_json_logs_to_df(path)

    df["start_time_dt"] = pd.to_datetime(df["start_time"], format="%H:%M:%S.%f")
    df["end_time_dt"] = pd.to_datetime(df["end_time"], format="%H:%M:%S.%f")

    first_starting_time = df["start_time_dt"].min()
    last_end_time = df["end_time_dt"].max()
    
    number_input_tokens = df["number_input_tokens"].sum()
    number_output_tokens = df["number_output_tokens"].sum()
    number_total_tokens = number_input_tokens + number_output_tokens

    total_tok_s = number_total_tokens / (last_end_time - first_starting_time).total_seconds()
    
    output = {
        title: {
            'number_input_tokens': number_input_tokens,
            'number_output_tokens': number_output_tokens,
            'number_total_tokens': number_total_tokens,
            'first_starting_time': first_starting_time,
            'last_end_time': last_end_time,
            'total_duration_s': (last_end_time - first_starting_time).total_seconds(),
            'total_tokens_per_s': total_tok_s
        }
    }
    
    return pd.DataFrame.from_dict(output, orient='index')

def get_tok_s_summary(kit_paths: dict) -> pd.DataFrame:
    summary_dfs = []
    for title, path in kit_paths.items():
        df_summary = calculate_total_tokens_per_s(title, path)
        summary_dfs.append(df_summary)
    
    return pd.concat(summary_dfs)



In [29]:
import pandas as pd
import plotly.graph_objects as go

def plot_bar_line_comparison(summary_df: pd.DataFrame):
    titles = summary_df.index.tolist()
    tokens_per_s = summary_df["total_tokens_per_s"].tolist()
    duration_s = summary_df["total_duration_s"].tolist()
    
    fig = go.Figure()

    # Bars (tokens/sec), one per title, each in different color
    for t, tok_s in zip(titles, tokens_per_s):
        fig.add_trace(go.Bar(
            x=[t],
            y=[tok_s],
            name=f"{t} - Tokens/sec",   # legend shows the title
            yaxis="y1",
            opacity=0.8,
            text=[f"{tok_s:,.0f}"],     # rounded, thousands separator
            textposition="outside"
        ))

    # Line (duration), added last so it overlays the bars
    fig.add_trace(go.Scatter(
        x=titles,
        y=duration_s,
        name="Duration (s)",
        mode="lines+markers",
        text=[f"{d:,.0f}" for d in duration_s],
        textposition="top center",
        yaxis="y2",
        line=dict(width=3, color="black")
    ))

    # Calculate padding for bar labels
    y_max_tok_per_s = max(tokens_per_s) * 1.2

    # Layout with secondary axis + hide x labels + legend below
    fig.update_layout(
        title="Comparative Tokens/sec (bars) vs Duration (line)",
        xaxis=dict(
            title="",
            showticklabels=False
        ),
        yaxis=dict(
            title="Tokens per Second",
            side="left",
            range=[0, y_max_tok_per_s]   # add headroom for labels
        ),
        yaxis2=dict(
            title="Duration (s)",
            overlaying="y",
            side="right",
        ),
        barmode="group",
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.2,          # position below chart
            xanchor="center",
            x=0.5
        )
    )
    
    fig.show()


In [14]:
qps = 250
datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential/grafana-1_ckpts-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential/grafana-5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential/grafana-5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential.csv'
}
comparative_bar_plots(datasets)

kit_datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential',
}
comparative_dist_plots(kit_datasets)

summary_df = get_tok_s_summary(kit_datasets)
plot_bar_line_comparison(summary_df)

In [31]:
qps = 125
datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential/grafana-1_ckpts-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential/grafana-5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential/grafana-5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential.csv'
}
comparative_bar_plots(datasets)

kit_datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential',
}
comparative_dist_plots(kit_datasets)

summary_df = get_tok_s_summary(kit_datasets)
plot_bar_line_comparison(summary_df)

In [32]:
comparative_dist_plots_v2(kit_datasets)

In [16]:
qps = 25
datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential/grafana-1_ckpts-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential/grafana-5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential/grafana-5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential.csv'
}
comparative_bar_plots(datasets)

kit_datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential',
}
comparative_dist_plots(kit_datasets)

summary_df = get_tok_s_summary(kit_datasets)
plot_bar_line_comparison(summary_df)

In [17]:
qps = 12.5
datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential/grafana-1_ckpts-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential/grafana-5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential/grafana-5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential.csv'
}
comparative_bar_plots(datasets)

kit_datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential',
}
comparative_dist_plots(kit_datasets)

summary_df = get_tok_s_summary(kit_datasets)
plot_bar_line_comparison(summary_df)

In [18]:
qps = 2.5
datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential/grafana-1_ckpts-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential/grafana-5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential/grafana-5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential.csv'
}
comparative_bar_plots(datasets)

kit_datasets = {
    "Single model": f'../data/bundle_tests/5_node_samsung/1_ckpts-5000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage": f'../data/bundle_tests/5_node_samsung/5_ckpts-equally_spread-5000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model": f'../data/bundle_tests/5_node_samsung/5_ckpts-dominant_spread-5000_reqs-total_{qps}_qps_exponential',
}
comparative_dist_plots(kit_datasets)

summary_df = get_tok_s_summary(kit_datasets)
plot_bar_line_comparison(summary_df)