In [1]:

import os
import pandas as pd
import numpy as np
from pathlib import Path
import numpy as np
import pandas as pd
import plotly.express as px
import json 
import re
from typing import Optional
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go
import plotly.colors as pc
from plotly.subplots import make_subplots
import pandas as pd
import re
import math

# Using workaround for batch sizes

In [2]:
def assign_groups(df, threshold=0.3):
    df = df.sort_values("start_time").reset_index(drop=True)
    group_ids = []
    current_ttft = None
    current_group = 0

    for _, row in df.iterrows():
        ttft = row["server_ttft_s"]
        if current_ttft is None:
            # first group
            current_group += 1
            current_ttft = ttft
        elif abs(ttft - current_ttft) > threshold:
            # start new group
            current_group += 1
            current_ttft = ttft
        group_ids.append(current_group)

    df["group_id"] = group_ids
    return df

In [3]:
tests_dirs = os.listdir('../data/bundle_tests/amit_tests/')
# tests_dirs = tests_dirs[:1]
tests_dirs

['test_1', 'summary_bundle_tests.xlsx', 'test_3', 'test_4', 'test_2']

In [4]:
df_summary = pd.DataFrame()

for test_dir in tests_dirs:
    print(f"\n--- Analyzing test: {test_dir} ---")
    
    folder = Path(f"../data/bundle_tests/amit_tests/{test_dir}")
    json_files = list(folder.rglob("*.json"))
    run_files = [f for f in json_files if f.suffix == ".json" and "individual_responses" in f.name]
    # run_files = run_files[:1]
    
    for run_file in run_files:
        print(f"\nRun file: {run_file}")

        df = pd.read_json(str(run_file))
        df = df[df['error_code'].isnull()]
        df["start_time"] = pd.to_datetime(df["start_time"], format="%H:%M:%S.%f")
        
        prefill_time_s_median = df['server_ttft_s'].median()
        total_decode_time_s_median = df['server_end_to_end_latency_s'].median() - prefill_time_s_median
        
        # Apply grouping
        df_with_groups = assign_groups(df, threshold=0.3)
        
        # Create summary DataFrame
        # summary = (
        #     df_with_groups.groupby("group_id")
        #     .agg(
        #         group_size=("server_ttft_s", "size"),
        #         median_ttft=("server_ttft_s", "median"),
        #         min_ttft=("server_ttft_s", "min"),
        #         max_ttft=("server_ttft_s", "max"),
        #         min_start=("start_time", "min"),
        #         max_end=("end_time", "max")
        #     )
        #     .reset_index()
        # )
        
        # ---- Count model switches ----
        model_switches = 0
        model_switch_hit = 0

        grouped = df_with_groups.groupby("group_id")

        # Sort groups by ID so we can walk sequentially
        for gid in sorted(grouped.groups.keys()):
            if gid == 1:
                continue  # no switch before the first group
            model_switches += 1

            group_df = grouped.get_group(gid).sort_values("start_time")

            if len(group_df) > 1:
                first_ttft = group_df.iloc[0]["server_ttft_s"]
                median_rest = np.median(group_df.iloc[1:]["server_ttft_s"]  )
                if first_ttft - median_rest > 0.05:  # 50 ms
                    model_switch_hit += 1
        
        lru_hit_rate = (model_switches - model_switch_hit)/model_switches if model_switches > 0 else np.nan
        
        df_summary = pd.concat([pd.DataFrame({
            "test_dir": [test_dir],
            "run_file": [run_file.name],
            "num_requests": [len(df)],
            "num_groups": [df_with_groups["group_id"].nunique()],
            "median_prefill_time_s": [prefill_time_s_median],
            "median_total_decode_time_s": [total_decode_time_s_median],
            "model_switches": [model_switches],
            "model_switch_hits": [model_switch_hit],
            "lru_hit_rate": [lru_hit_rate]
        }),df_summary])
            
    


--- Analyzing test: test_1 ---

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_40_stream_91973997-cc8c-4735-8952-ac6a6fe18743_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_60_stream_66b6cbc0-2c6a-46ac-9789-13d7a2d907b3_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_10_stream_77f21779-0ff5-4399-9049-f7711e34df85_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_90_stream_8059cbec-22dc-412c-9184-0fa7d9c8ae9a_individual_responses.json

--- Analyzing test: summary_bundle_tests.xlsx ---

--- Analyzing test: test_3 ---

Run file: ../data/bundle_tests/amit_tests/test_3/20250822-171614.329885/synthetic_0_Llama-3-1-OpenScholar-8B

  arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)



Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_Llama-3-1-EIRAI-8B_3900_100_40_stream_2c692d3d-5a87-434a-821d-b1c10c22623b_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_Gazal-R1-32B-sft-merged-preview_8000_100_90_stream_4ca78361-0798-4f55-b776-cf38c8ae611b_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_UIGEN-T3-32B-Preview_8000_100_60_stream_7a27932a-f9f5-4ed1-ae84-7782587847e6_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_Llama-3-1-EIRAI-8B_3900_100_10_stream_8ac52616-c440-4d84-9905-1d2024686eec_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_Qwen3-32B-abliterated_8000_100_90_stream_fae8cfba-3519-4c4b-8921-21174f6038d9_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_4/20

In [5]:
df_summary.sort_values(["test_dir", "run_file"], inplace=True)
df_summary

Unnamed: 0,test_dir,run_file,num_requests,num_groups,median_prefill_time_s,median_total_decode_time_s,model_switches,model_switch_hits,lru_hit_rate
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,10,5,0.596143,0.139529,4,0,1.0
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,40,13,1.177537,0.202860,12,0,1.0
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,60,4,1.176736,0.202699,3,0,1.0
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,90,20,1.176889,0.202706,19,0,1.0
0,test_2,synthetic_0_Hermes-3-Llama-3-1-8B_3900_100_10_...,10,4,0.659311,0.140115,3,0,1.0
...,...,...,...,...,...,...,...,...,...
0,test_4,synthetic_0_narrativAIV2_3900_100_90_stream_b0...,90,19,1.179694,0.201901,18,0,1.0
0,test_4,synthetic_0_natsumura-assistant-1-0-llama-3-1-...,10,2,1.253460,0.200806,1,0,1.0
0,test_4,synthetic_0_natsumura-assistant-1-0-llama-3-1-...,40,6,1.178718,0.202185,5,0,1.0
0,test_4,synthetic_0_natsumura-assistant-1-0-llama-3-1-...,60,3,1.180109,0.200830,2,0,1.0


In [26]:
df_summary.to_excel("../data/bundle_tests/amit_tests/summary_bundle_tests.xlsx", index=False)

# Using batch sizes from grafana

In [2]:
def plot_ttft_scatter(df):
    # Ensure batch_size is string for consistent categorical handling
    df["batch_size"] = df["batch_size"].astype(str)

    # Sort unique batch sizes numerically (cast to int for sorting, back to str for labels)
    batch_order = sorted(df["batch_size"].unique(), key=lambda x: int(x))

    # Make batch_size categorical with proper order
    df["batch_size"] = pd.Categorical(df["batch_size"], categories=batch_order, ordered=True)

    # Add jitter (in milliseconds)
    # df["time"] = pd.to_datetime(df["time"])
    df["time_jittered"] = df["start_time_formatted"] + pd.to_timedelta(
        np.random.uniform(-200, 200, size=len(df)), unit="ms"
    )

    # Define orders
    model_order = sorted(df["model"].unique())
    combo_order = [f"{b} - {m}" for b in batch_order for m in model_order]

    # New column: batch_size + model
    df["batch_model"] = df["batch_size"].astype(str) + " - " + df["model"]

    # Palette (large enough for batch_size x model combos)
    palette = px.colors.qualitative.Safe + px.colors.qualitative.Set2 + px.colors.qualitative.Dark24

    fig = px.scatter(
        df,
        x="time_jittered",
        y="time_to_first_token",
        color="batch_model",              # color by batch_size + model combo
        symbol="model",                   # marker shape by model
        category_orders={
            "batch_model": combo_order,
            "model": model_order
        },
        color_discrete_sequence=palette,  # categorical palette
        title="Time to First Token per Request",
        labels={
            "time_jittered": "Time",
            "time_to_first_token": "Time to First Token (s)",
            "batch_size": "Batch Size",
            "model": "Model",
            "batch_model": "Batch Size - Model"
        },
        hover_data=["batch_size", "model"]
    )

    # Style markers
    fig.update_traces(
        mode="markers",
        marker=dict(size=8, opacity=0.7, line=dict(width=0.5, color="black"))
    )

    # Make legends clearer (color vs symbol)
    fig.update_layout(
        legend=dict(
            title="Legend",
            groupclick="toggleitem"
        )
    )

    fig.show()
    
def find_uuid(file_name: str) -> Optional[str]:
    match = re.search(r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', file_name)
    uuid = None
    if match:
        uuid = match.group()
    else:
        raise ValueError(f"UUID not found in filename {file_name}")
        
    return uuid


def read_json_files_to_df(directory: str) -> pd.DataFrame:
    data = []

    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('individual_responses.json'):
            model_name = '_'.join(filename.split('_')[2:3])
            file_path = os.path.join(directory, filename)
            # Read the JSON file
            with open(file_path, 'r') as file:
                json_data = json.load(file)
                # Extract relevant fields from each JSON object and append to the data list
                for item in json_data:
                    if pd.isnull(item['error_code']):
                        data.append(
                            {
                                'start_time': datetime.strptime(item['start_time'], "%H:%M:%S.%f"),
                                'end_time': datetime.strptime(item['end_time'], "%H:%M:%S.%f"),
                                'client_end_to_end_latency_s': item['client_end_to_end_latency_s'],
                                'client_ttft_s': item['client_ttft_s'],
                                'model_name': model_name,
                                'uuid': find_uuid(filename),
                            }
                        )
    # Create a DataFrame from the data list
    df = pd.DataFrame(data)
    return df

def rgb_to_hex(rgb_str: str) -> str:
    """Convert 'rgb(r,g,b)' string to hex string like '#rrggbb'."""
    nums = list(map(int, re.findall(r"\d+", rgb_str)))
    return "#{:02x}{:02x}{:02x}".format(*nums)

def lighten_color(color: str, factor: float = 0.6) -> str:
    """
    Lightens a color (hex or rgb string) by interpolating toward white.
    Always returns a hex string.
    """
    if color.startswith("rgb"):
        color = rgb_to_hex(color)
    r, g, b = pc.hex_to_rgb(color)
    lighter_rgb = (
        int(r + (255 - r) * factor),
        int(g + (255 - g) * factor),
        int(b + (255 - b) * factor),
    )
    return "#{:02x}{:02x}{:02x}".format(*lighter_rgb)

def plot_requests_gantt_chart(df_user: pd.DataFrame, output_dir: str, file_name: str):
    """
    Plots a Gantt chart of response timings across all requests,
    with per-model metric colors and layered TTFT vs latency.
    """    
    requests = df_user.index + 1
    fig = go.Figure()

    # Assign a base color per model
    unique_models = df_user["model_name"].unique()
    palette = pc.qualitative.Set2
    color_map = {model: palette[i % len(palette)] for i, model in enumerate(unique_models)}

    # Add bars for each metric and model
    for model in unique_models:
        model_df = df_user[df_user["model_name"] == model]

        # End-to-end latency → lighter color
        fig.add_trace(
            go.Bar(
                y=model_df.index + 1,
                x=1000 * model_df["client_end_to_end_latency_s"],
                base=[str(x) for x in model_df["start_time"]],
                name=f"End-to-end latency - {model}",
                orientation="h",
                marker_color=lighten_color(color_map[model], factor=0.5),
                legendgroup=model,
                offsetgroup=f"{model}-latency",
            )
        )

        # TTFT → normal color (plotted after to be on top)
        fig.add_trace(
            go.Bar(
                y=model_df.index + 1,
                x=1000 * model_df["client_ttft_s"],
                base=[str(x) for x in model_df["start_time"]],
                name=f"TTFT - {model}",
                orientation="h",
                marker_color=lighten_color(color_map[model], factor=0.0),  # keep base color
                legendgroup=model,
                offsetgroup=f"{model}-ttft",
            )
        )

    # Alternate row shading
    for i in range(0, len(df_user.index), 2):
        fig.add_hrect(y0=i + 0.5, y1=i + 1.5, line_width=0, fillcolor="grey", opacity=0.1)

    fig.update_xaxes(
        type="date",
        tickformat="%H:%M:%S",
        hoverformat="%H:%M:%S.%2f",
    )
    fig.update_layout(
        barmode="overlay",  # ensures TTFT overlays latency
        title_text="LLM requests across time",
        xaxis_title="Time stamp",
        yaxis_title="Request index",
    )

    fig.show()
    fig.write_html(f"{output_dir}/llms_across_time-{file_name}.html", include_plotlyjs="cdn")

    return fig

def get_reports(grafana_path, jsons_path, file_name):
    
    # ttft plot
    df = pd.read_csv(grafana_path)

    df.sort_values('start_time', inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['start_time_formatted'] = pd.to_datetime(df['start_time'], unit="s")
    df['end_time_formatted'] = pd.to_datetime(df['end_time'], unit="s")
    
    plot_ttft_scatter(df)
    
    # llm over time plot
    df = read_json_files_to_df(jsons_path)
    df = df.sort_values('end_time').reset_index(drop=True)
    
    output_dir = os.path.dirname(grafana_path)
    plot_requests_gantt_chart(df, output_dir, file_name=file_name)


In [3]:
# for testing
# filename = '5_ckpts-100_reqs-unevenly_qps_spread'
# grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}-test.csv'

def create_summary(grafana_path):

    # Read CSV
    df = pd.read_csv(grafana_path)
    # print(df.columns)
    df.sort_values(['start_time','time_to_first_token'], ascending=[True, False], inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['start_time_formatted'] = pd.to_datetime(df['start_time'], unit="s")
    df['end_time_formatted'] = pd.to_datetime(df['end_time'], unit="s")
    
    # Calculate decoding_time
    df['decoding_time'] = df['total_latency'] - df['time_to_first_token']
    
    # Compute group_ids
    df["group_ids"] = (
        df[["model", "prompt_tokens_count", "batch_size"]]
        .ne(df[["model", "prompt_tokens_count", "batch_size"]].shift())
        .any(axis=1)
        .cumsum()
    )
    # df[['group_ids','start_time_formatted','end_time_formatted','model','prompt_tokens_count','batch_size','time_to_first_token']]
    
    # Create markers
    df["candidate_switch"] = False
    df["switch_hit"] = False

    # Identify candidate switches and switch hits 
    unique_groups = sorted(df["group_ids"].unique())

    for group in unique_groups:  # include first group
        # Find the first row of this group
        first_row_idx = df[df["group_ids"] == group].index[0]
        df.loc[first_row_idx, "candidate_switch"] = True

        # Check switch_hit condition (only if group has >1 row)
        group_rows = df[df["group_ids"] == group][["time_to_first_token","batch_size"]].reset_index(drop=True)
        if len(group_rows["time_to_first_token"]) > 1 and not (group_rows["batch_size"].eq(1).all()):
            first_val = group_rows["time_to_first_token"].iloc[0]
            rest_vals = group_rows["time_to_first_token"].iloc[1:]
            median_rest = rest_vals.median()
            if first_val > median_rest:
                df.loc[first_row_idx, "switch_hit"] = True
     
    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also           
    #     display(df[['group_ids','start_time_formatted','end_time_formatted','model','prompt_tokens_count','batch_size','time_to_first_token','candidate_switch','switch_hit']])
    # print(df["candidate_switch"].sum(), df["switch_hit"].sum())
    
    # Summary statistics
    df_summary = df.groupby(['model','prompt_tokens_count','completion_tokens_count']).agg({'completion_tokens_after_first_per_sec': ['median','min','max','std'], 'total_latency': ['median','min','max','std'], 'time_to_first_token': ['median','min','max','std'], 'decoding_time': ['median','min','max','std'], 'candidate_switch': 'sum', 'switch_hit': 'sum'})
    df_summary.columns = ['_'.join(col) for col in df_summary.columns]
    df_summary = df_summary.rename(columns=lambda c: c
                   .replace("completion_tokens_after_first_per_sec", "Tokens/s")
                   .replace("total_latency", "Latency")
                   .replace("time_to_first_token", "TTFT")
                   .replace("decoding_time", "Decoding Time")
                   )
    
    # Calculate LRU hit rate
    df_summary['LRU Hit Rate'] = (df_summary['candidate_switch_sum'] - df_summary['switch_hit_sum'])/df_summary['candidate_switch_sum']
    
    return df_summary

In [4]:
def plot_batch_size_histogram_grid(grafana_path: str, file_name: str = "batch_size_histogram_grid"):
    """
    Plots batch size histogram distributions per model in a grid.
    Legend shows batch_size-model combinations, sorted by batch_size then model.
    Adds extra horizontal spacing to avoid overlapping subplot titles.
    """

    df_grafana = pd.read_csv(grafana_path)

    # all unique batch sizes and assign consistent colors
    unique_batch_sizes = sorted(df_grafana["batch_size"].unique())
    batch_palette = pc.qualitative.Set3
    batch_color_map = {
        bs: batch_palette[i % len(batch_palette)] for i, bs in enumerate(unique_batch_sizes)
    }

    # models
    unique_models = sorted(df_grafana["model"].unique())
    n_models = len(unique_models)

    # grid layout: 3 columns, enough rows to fit all models
    ncols = 3
    nrows = math.ceil(n_models / ncols)

    fig = make_subplots(
        rows=nrows,
        cols=ncols,
        subplot_titles=[f"{m}" for m in unique_models],
        horizontal_spacing=0.25   # <-- more space between columns
    )

    # collect traces first so we can sort them
    traces = []
    for idx, model in enumerate(unique_models):
        row = idx // ncols + 1
        col = idx % ncols + 1
        model_df = df_grafana[df_grafana["model"] == model]

        for bs in sorted(model_df["batch_size"].unique()):
            bs_df = model_df[model_df["batch_size"] == bs]
            legend_name = f"{bs}-{model}"
            traces.append((
                bs, model, row, col,  # sorting helpers
                go.Histogram(
                    x=bs_df["batch_size"],
                    name=legend_name,
                    marker=dict(color=batch_color_map[bs]),
                    opacity=0.75,
                    showlegend=True
                )
            ))

        # X-axis: only existing batch sizes for that model
        fig.update_xaxes(
            tickmode="array",
            tickvals=sorted(model_df["batch_size"].unique()),
            ticktext=[str(bs) for bs in sorted(model_df["batch_size"].unique())],
            row=row, col=col
        )

    # sort traces by batch_size, then model
    traces = sorted(traces, key=lambda t: (t[0], t[1]))

    # add sorted traces
    for _, _, row, col, trace in traces:
        fig.add_trace(trace, row=row, col=col)

    # layout
    fig.update_layout(
        title="Batch size distributions per model",
        xaxis_title="Batch size",
        yaxis_title="Count",
        barmode="overlay",
        bargap=0.2,
        hoverlabel=dict(font_color="black"),
        height=300 * nrows,
        width=400 * ncols,   # widen figure to match spacing
        legend_title="Batch size - Model"
    )

    fig.show()
    output_dir = os.path.dirname(grafana_path)
    fig.write_html(f"{output_dir}/batch_size_histogram-{file_name}.html", include_plotlyjs="cdn")

    return fig


In [5]:
def plot_total_batch_size_histogram(grafana_path: str, file_name: str = "batch_size_histogram_total"):
    """
    Plots a total batch size histogram across all models.
    Each batch_size is consistently colored.
    """

    df_grafana = pd.read_csv(grafana_path)

    # all unique batch sizes and assign consistent colors
    unique_batch_sizes = sorted(df_grafana["batch_size"].unique())
    batch_palette = pc.qualitative.Set3
    batch_color_map = {
        bs: batch_palette[i % len(batch_palette)] for i, bs in enumerate(unique_batch_sizes)
    }

    # convert to categorical to avoid gaps
    df_grafana["batch_size"] = df_grafana["batch_size"].astype(str)

    # collect traces for each batch_size
    traces = []
    for bs in unique_batch_sizes:
        bs_df = df_grafana[df_grafana["batch_size"] == str(bs)]
        traces.append(
            go.Histogram(
                x=bs_df["batch_size"],
                name=f"{bs}",
                marker=dict(color=batch_color_map[bs]),
                opacity=0.75,
                showlegend=True
            )
        )

    # build figure
    fig = go.Figure(data=traces)

    fig.update_layout(
        title="Total Batch Size Distribution (All Models)",
        xaxis_title="Batch size",
        yaxis_title="Count",
        barmode="overlay",
        bargap=0.2,
        hoverlabel=dict(font_color="black"),
        legend_title="Batch size",
        xaxis=dict(
            type="category",  # categorical spacing, not numeric
            categoryorder="array",
            categoryarray=[str(bs) for bs in unique_batch_sizes]
        )
    )

    # write output
    fig.show()
    output_dir = os.path.dirname(grafana_path)
    fig.write_html(f"{output_dir}/{file_name}.html", include_plotlyjs="cdn")

    return fig


## 2_ckpts-100_conc_reqs-50_50

In [5]:
df_test1 = pd.read_csv("../data/bundle_tests/amit_tests_v2/2_ckpts-100_conc_reqs-50_50/grafana_test1.csv")

df_test1.sort_values('start_time', inplace=True)
df_test1.reset_index(drop=True, inplace=True)
df_test1['start_time_formatted'] = pd.to_datetime(df_test1['start_time'], unit="s")
df_test1['end_time_formatted'] = pd.to_datetime(df_test1['end_time'], unit="s")
# df_test1[['@timestamp','time','start_time_formatted','end_time_formatted']].head()

In [7]:
plot_ttft_scatter(df_test1)


In [9]:
df_test1 = read_json_files_to_df('../data/bundle_tests/amit_tests_v2/2_ckpts-100_conc_reqs-50_50/20250915-180848.094978')
df_test1 = df_test1.sort_values('end_time').reset_index(drop=True)

In [11]:
plot_requests_gantt_chart(df_test1, file_name="test1")

## 2_ckpts-100_conc_reqs-1_req_for_all

In [12]:
import pandas as pd

df_test2 = pd.read_csv("../data/bundle_tests/amit_tests_v2/2_ckpts-100_conc_reqs-1_req_for_all/grafana_test2.csv")

df_test2.sort_values('start_time', inplace=True)
df_test2.reset_index(drop=True, inplace=True)
df_test2['start_time_formatted'] = pd.to_datetime(df_test2['start_time'], unit="s")
df_test2['end_time_formatted'] = pd.to_datetime(df_test2['end_time'], unit="s")

In [13]:
plot_ttft_scatter(df_test2)

In [16]:
df_test2 = read_json_files_to_df('../data/bundle_tests/amit_tests_v2/2_ckpts-100_conc_reqs-1_req_for_all/20250915-181140.549877')
df_test2 = df_test2.sort_values('end_time').reset_index(drop=True)
# df_test2.head(30)

In [17]:
plot_requests_gantt_chart(df_test2, file_name="test2")

## 5_ckpts-100_conc_reqs-equally_spread

In [None]:
filename = '5_ckpts-100_conc_reqs-equally_spread'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/20250917-152115.104458'

In [22]:
get_reports(grafana_path, jsons_path, filename)

## 5_ckpts-100_conc_reqs-unequally_spread

In [None]:
filename = '5_ckpts-100_conc_reqs-unequally_spread'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/20250917-150135.148944'

In [12]:
get_reports(grafana_path, jsons_path, filename)

FileNotFoundError: [Errno 2] No such file or directory: '../data/bundle_tests/amit_tests_v2/10_ckpts-100_conc_reqs-unequally_spread/grafana-10_ckpts-100_conc_reqs-unequally_spread.csv'

## 5_ckpts-100_reqs-equally_qps_spread

In [13]:
filename = '5_ckpts-100_reqs-equally_qps_spread'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/20250918-164058.322378'

In [14]:
get_reports(grafana_path, jsons_path, filename)

## 5_ckpts-100_reqs-unevenly_qps_spread

In [98]:
filename = '5_ckpts-100_reqs-unevenly_qps_spread'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/20250918-164145.670015'

In [99]:
get_reports(grafana_path, jsons_path, filename)

## 5_ckpts-100_reqs-0.1_qps_equally_spread

In [115]:
filename = '5_ckpts-100_reqs-0.1_qps_equally_spread'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/20250919-145830.434496'

get_reports(grafana_path, jsons_path, filename)

In [116]:
df_summary = create_summary(grafana_path)
display(df_summary)
df_summary.to_csv(f'model_ss_summary-{filename}.csv')

Unnamed: 0,group_ids,start_time_formatted,end_time_formatted,model,prompt_tokens_count,batch_size,time_to_first_token,candidate_switch,switch_hit
0,1,2025-09-19 19:58:34.314850048,2025-09-19 19:58:34.486889984,naps-llama-3_1_instruct-v0.6.0,3934,1,0.083142,True,False
1,2,2025-09-19 19:58:34.538019840,2025-09-19 19:58:34.708349952,Meta-Llama-3.1-8B-Instruct,3934,1,0.081514,True,False
2,3,2025-09-19 19:58:34.760369920,2025-09-19 19:58:34.930350080,LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,1,0.081343,True,False
3,4,2025-09-19 19:58:34.984800000,2025-09-19 19:58:35.157159936,Hermes-3-Llama-3.1-8B,3919,1,0.083402,True,False
4,5,2025-09-19 19:58:35.209809920,2025-09-19 19:58:35.380310016,Llama-3.1-EIRAI-8B,3934,1,0.081887,True,False
5,6,2025-09-19 19:58:44.308160000,2025-09-19 19:58:44.479460096,naps-llama-3_1_instruct-v0.6.0,3934,1,0.082601,True,False
6,7,2025-09-19 19:58:44.531089920,2025-09-19 19:58:44.701740032,Hermes-3-Llama-3.1-8B,3919,1,0.082396,True,False
7,8,2025-09-19 19:58:44.757289984,2025-09-19 19:58:44.927200000,Meta-Llama-3.1-8B-Instruct,3934,1,0.08142,True,False
8,9,2025-09-19 19:58:44.980019968,2025-09-19 19:58:45.150180096,LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,1,0.082319,True,False
9,10,2025-09-19 19:58:45.204070144,2025-09-19 19:58:45.374439936,Llama-3.1-EIRAI-8B,3934,1,0.08209,True,False


96 1


Unnamed: 0_level_0,Unnamed: 1_level_0,time_to_first_token_median,time_to_first_token_min,time_to_first_token_max,time_to_first_token_std,decoding_time_median,decoding_time_min,decoding_time_max,decoding_time_std,candidate_switch_sum,switch_hit_sum,lru_hit_rate
model,prompt_tokens_count,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Hermes-3-Llama-3.1-8B,3919,0.081978,0.081396,0.083402,0.000498,0.088381,0.088141,0.089061,0.000232,20,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,0.082106,0.081343,0.083335,0.000573,0.088503,0.087846,0.088907,0.000243,19,1,0.947368
Llama-3.1-EIRAI-8B,3934,0.082325,0.081439,0.082824,0.000451,0.088578,0.087899,0.088878,0.000243,18,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,0.082007,0.081332,0.083267,0.00056,0.088459,0.088219,0.089453,0.000285,19,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,0.082193,0.081364,0.083142,0.000527,0.088565,0.088201,0.089206,0.000266,20,0,1.0


## 5_ckpts-100_reqs-1_qps_equally_spread

In [108]:
filename = '5_ckpts-100_reqs-1_qps_equally_spread'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/20250919-145750.655883'

get_reports(grafana_path, jsons_path, filename)

In [109]:
df_summary = create_summary(grafana_path)
display(df_summary)
df_summary.to_csv(f'model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,time_to_first_token_median,time_to_first_token_min,time_to_first_token_max,time_to_first_token_std,decoding_time_median,decoding_time_min,decoding_time_max,decoding_time_std,candidate_switch_sum,switch_hit_sum,lru_hit_rate
model,prompt_tokens_count,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Hermes-3-Llama-3.1-8B,3919,0.082608,0.081486,0.153214,0.035342,0.088856,0.088156,0.097037,0.003911,16,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,0.082712,0.08186,0.153044,0.032985,0.088637,0.088066,0.097537,0.004037,17,0,1.0
Llama-3.1-EIRAI-8B,3934,0.082196,0.081469,0.083529,0.000641,0.088541,0.088156,0.08953,0.000299,20,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,0.11774,0.081311,0.152794,0.036112,0.092434,0.088115,0.096881,0.004134,15,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,0.117569,0.08152,0.152986,0.036077,0.092693,0.088117,0.096838,0.004021,15,0,1.0


## 5_ckpts-100_reqs-0.1_qps_exponential_equally_spread , concurrent process

In [127]:
filename = '5_ckpts-100_reqs-0.1_qps_exponential_equally_spread'
runname = '20250919-173252.021015-conc'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}-{runname}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [128]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [129]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [130]:
df_summary = create_summary(grafana_path)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1116.460226,1111.017424,1126.757614,4.181556,0.171488,0.169922,0.173646,0.000727,0.082774,0.08206,0.084995,0.000631,0.088673,0.087863,0.089108,0.000332,16,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1111.560283,1101.489205,1124.828584,6.524432,0.171737,0.17083,0.172849,0.000607,0.082759,0.081806,0.083651,0.000457,0.089064,0.088013,0.089878,0.000521,18,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1113.733673,1019.044302,1122.050472,29.185875,0.171808,0.170627,0.24985,0.024017,0.082914,0.082029,0.1527,0.021508,0.08889,0.088231,0.09715,0.002541,17,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1109.912752,1105.342011,1125.182151,5.127944,0.172101,0.170536,0.173813,0.000669,0.083027,0.0822,0.084507,0.000509,0.089196,0.087986,0.089565,0.000409,17,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,1114.445148,1105.083143,1124.560509,4.331601,0.171631,0.170477,0.172721,0.00068,0.082868,0.081993,0.083829,0.000488,0.088833,0.088034,0.089586,0.000345,15,0,1.0


## 5_ckpts-100_reqs-0.1_qps_exponential_equally_spread , normal process

In [8]:
filename = '5_ckpts-100_reqs-0.1_qps_exponential_equally_spread'
runname = '20250919-173934.901597'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}-{runname}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [None]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1113.947282,1097.796656,1124.828584,7.584925,0.171904,0.169542,0.173525,0.000917,0.082838,0.081348,0.083849,0.000553,0.088873,0.088013,0.090181,0.000608,1,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1112.986107,1098.086967,1121.977708,6.167165,0.172113,0.1709,0.173324,0.000761,0.082968,0.082083,0.083771,0.000411,0.08895,0.088237,0.090157,0.000495,1,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1112.471901,1104.980217,1120.279549,4.647445,0.17206,0.17099,0.173471,0.000778,0.08296,0.08197,0.084206,0.000607,0.088991,0.088371,0.089594,0.000372,1,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1109.093089,1081.163806,1122.563114,9.176696,0.172528,0.169992,0.175357,0.001516,0.083131,0.081485,0.085071,0.000907,0.089262,0.088191,0.091568,0.000746,1,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,1111.122972,1097.152713,1120.724024,6.09754,0.172302,0.171057,0.174254,0.000831,0.08308,0.082491,0.0846,0.00053,0.089099,0.088336,0.090234,0.00049,1,0,1.0


## 5_ckpts-100_reqs-0.5_qps_exponential_equally_spread

In [123]:
filename = '5_ckpts-100_reqs-0.5_qps_exponential_equally_spread'
runname = '20250922-191338.582608'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [124]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [125]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [126]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1114.799631,1014.265152,1126.112851,31.258571,0.171608,0.169931,0.250556,0.024346,0.082863,0.081334,0.152948,0.021649,0.088805,0.087913,0.097608,0.002729,17,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1115.740185,1004.818197,1122.396227,42.724315,0.171357,0.170798,0.337438,0.05403,0.082938,0.08206,0.238913,0.050451,0.08873,0.088204,0.098525,0.00374,15,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1114.274063,1011.352605,1122.884893,41.072822,0.171823,0.170814,0.252228,0.032674,0.083051,0.082288,0.15434,0.029105,0.088847,0.088166,0.097889,0.003587,16,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1114.654465,1104.28377,1119.41278,3.81403,0.171541,0.170795,0.173054,0.000536,0.082739,0.081991,0.08354,0.000441,0.088817,0.088439,0.089651,0.000305,13,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,1113.667012,1023.697059,1125.136419,28.516574,0.171844,0.169626,0.249005,0.02381,0.082896,0.08155,0.152297,0.021394,0.088896,0.087989,0.096708,0.002464,19,0,1.0


## 5_ckpts-100_reqs-1_qps_exponential_equally_spread

In [119]:
filename = '5_ckpts-100_reqs-1_qps_exponential_equally_spread'
runname = '20250922-190941.709402'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [120]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [121]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [122]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1108.624284,901.374294,1124.058212,78.918702,0.172897,0.170582,0.455189,0.102448,0.083633,0.082037,0.345356,0.094822,0.0893,0.088074,0.109832,0.007671,12,1,0.916667
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1113.862313,899.608938,1120.049891,78.597587,0.171991,0.171257,0.505164,0.121647,0.08313,0.08213,0.395116,0.113962,0.08888,0.088389,0.110048,0.007758,14,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1038.738735,1014.123956,1125.816691,48.300561,0.248558,0.169997,0.341115,0.05505,0.152977,0.081452,0.244512,0.051841,0.095323,0.087936,0.097621,0.004199,14,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1025.468017,893.217122,1118.70104,91.48937,0.249853,0.17073,0.508838,0.12605,0.153271,0.081753,0.398003,0.117353,0.096541,0.088495,0.110835,0.009066,13,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,1113.427549,900.924487,1121.841302,79.747623,0.171819,0.170365,0.508119,0.125548,0.083104,0.08168,0.398232,0.117843,0.088915,0.088248,0.109887,0.007782,15,0,1.0


## 5_ckpts-100_reqs-5_qps_exponential_equally_spread

In [99]:
filename = '5_ckpts-100_reqs-5_qps_exponential_equally_spread'
runname = '20250922-191817.887916'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [100]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [None]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [None]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,492.292706,491.457753,1113.99508,159.838433,1.416263,0.172328,1.416605,0.377812,1.215163,0.083459,1.215163,0.34242,0.2011,0.088869,0.201442,0.035421,3,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,490.384305,489.208882,709.889023,107.252221,1.471621,0.825301,1.472106,0.316297,1.269738,0.685843,1.269738,0.285736,0.201882,0.139458,0.202368,0.030562,2,0,1.0
Llama-3.1-EIRAI-8B,3934,100,493.400038,492.85189,894.758824,164.582922,1.464832,0.504747,1.465055,0.394007,1.264183,0.394103,1.264183,0.357073,0.200649,0.110644,0.200872,0.036934,2,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,492.336191,491.368774,707.467626,95.551871,1.462266,0.828412,1.462662,0.281618,1.261184,0.688476,1.261184,0.254432,0.201082,0.139936,0.201478,0.027187,2,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,489.290164,488.655052,884.972328,162.267904,1.471488,0.417345,1.47175,0.432611,1.269154,0.305477,1.269154,0.395484,0.202334,0.111868,0.202597,0.037127,2,0,1.0


## 5_ckpts-100_reqs-10_qps_exponential_equally_spread

In [114]:
filename = '5_ckpts-100_reqs-10_qps_exponential_equally_spread'
runname = '20250922-192002.553201'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [115]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [None]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [118]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,487.235925,486.349656,887.39693,164.105931,1.388197,0.420734,1.388567,0.397042,1.18501,0.309172,1.18501,0.359436,0.203187,0.111562,0.203557,0.037606,2,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,490.665635,490.08305,701.699168,105.892105,1.382668,0.739652,1.382908,0.323185,1.180901,0.598566,1.180901,0.292696,0.201767,0.141086,0.202007,0.03049,2,0,1.0
Llama-3.1-EIRAI-8B,3934,100,487.962807,487.356014,1115.716194,196.639122,1.383165,0.171983,1.383417,0.424263,1.18028,0.083251,1.18028,0.38364,0.202884,0.088732,0.203137,0.040627,3,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,491.225197,490.605342,897.582652,166.614929,1.38166,0.417041,1.381915,0.395868,1.180124,0.306745,1.180124,0.358427,0.201537,0.110296,0.201792,0.037441,2,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,488.867629,488.290164,712.35027,109.230532,1.384232,0.809268,1.384471,0.281361,1.181723,0.670291,1.181723,0.250274,0.202509,0.138977,0.202748,0.031087,2,0,1.0


## 5_ckpts-100_reqs-0.1_10_qps_exponential_unevenly_spread

In [110]:
filename = '5_ckpts-100_reqs-0.1_10_qps_exponential_unevenly_spread'
runname = '20250922-192400.872431'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [111]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [112]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [113]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,490.396757,489.788232,1088.972479,156.372623,1.382766,0.17417,1.383016,0.37464,1.180888,0.083259,1.180888,0.339138,0.201877,0.090911,0.202128,0.035521,3,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1108.410581,898.01359,1116.421138,77.331343,0.172716,0.17129,0.416469,0.089362,0.083251,0.082379,0.306226,0.081692,0.089317,0.088676,0.110243,0.007679,9,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1111.528976,1097.634148,1117.4276,5.782075,0.172357,0.171586,0.174122,0.00083,0.083237,0.082634,0.084256,0.000538,0.089067,0.088596,0.090194,0.000467,6,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,490.448886,489.901493,707.259159,101.814198,1.383434,0.737164,1.383659,0.303845,1.181578,0.597187,1.181578,0.274759,0.201856,0.139977,0.202081,0.029086,2,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,1104.386589,709.790732,1117.538872,174.346276,0.172748,0.171291,0.738256,0.247949,0.083284,0.082233,0.598778,0.225983,0.089643,0.088588,0.139478,0.02197,12,0,1.0


## 1_ckpts-100_reqs-50_qps_exponential

In [6]:
filename = '1_ckpts-100_reqs-50_qps_exponential'
runname = '20250923-184316.283625'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [7]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [8]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [9]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Meta-Llama-3.1-8B-Instruct,3934,100,489.084997,486.12589,1011.776464,86.838178,1.383016,0.283651,1.384583,0.206019,1.180563,0.185803,1.180933,0.186129,0.202419,0.097848,0.203651,0.019903,3,0,1.0


## 1_ckpts-100_reqs-25_qps_exponential

In [10]:
filename = '1_ckpts-100_reqs-25_qps_exponential'
runname = '20250923-181827.441700'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [11]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [12]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [13]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Meta-Llama-3.1-8B-Instruct,3934,100,489.837055,486.508639,1031.46309,91.072238,1.381816,0.249421,1.383168,0.217257,1.178899,0.153441,1.181184,0.196505,0.202108,0.09598,0.203491,0.020779,3,0,1.0


## 1_ckpts-100_reqs-5_qps_exponential

In [14]:
filename = '1_ckpts-100_reqs-5_qps_exponential'
runname = '20250923-182056.258588'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [15]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [16]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [17]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Meta-Llama-3.1-8B-Instruct,3934,100,906.458798,704.688172,1122.581323,115.872813,0.415104,0.170382,0.73741,0.161993,0.305983,0.08192,0.597315,0.14777,0.109217,0.08819,0.140488,0.014255,24,5,0.791667


## 1_ckpts-100_reqs-2.5_qps_exponential

In [18]:
filename = '1_ckpts-100_reqs-2.5_qps_exponential'
runname = '20250923-182510.750952'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [19]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [20]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [21]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Meta-Llama-3.1-8B-Instruct,3934,100,1113.745587,708.034601,1126.08842,141.240664,0.171444,0.169332,0.737383,0.192508,0.082693,0.081385,0.597559,0.175468,0.088889,0.087915,0.139824,0.017052,20,1,0.95


## 1_ckpts-100_reqs-0.5_qps_exponential

In [22]:
filename = '1_ckpts-100_reqs-0.5_qps_exponential'
runname = '20250923-184316.283625'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [23]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [24]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [25]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Meta-Llama-3.1-8B-Instruct,3934,100,1114.952262,1018.539377,1128.365089,18.501186,0.171566,0.170203,0.249856,0.015372,0.082771,0.081662,0.153055,0.013804,0.088793,0.087738,0.097198,0.001599,5,0,1.0


## 5_ckpts-100_reqs-50_qps_exponential_dominant_spread

In [26]:
filename = '5_ckpts-100_reqs-50_qps_exponential_dominant_spread'
runname = '20250925-190940.559474'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [27]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [28]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [29]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,892.631478,892.245948,1113.490231,98.800949,0.511572,0.246237,0.51162,0.118665,0.400664,0.157327,0.400664,0.108824,0.110908,0.08891,0.110956,0.009842,2,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,711.844697,711.609252,712.165787,0.215694,0.823162,0.823099,0.823208,4.2e-05,0.684087,0.684087,0.684087,0.0,0.139075,0.139013,0.139121,4.2e-05,1,0,1.0
Llama-3.1-EIRAI-8B,3934,100,717.145924,716.893345,717.490412,0.230888,0.823211,0.823145,0.82326,4.4e-05,0.685164,0.685164,0.685164,0.0,0.138047,0.137981,0.138096,4.4e-05,1,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,487.540268,486.739604,1112.332899,69.768741,1.383544,0.193079,1.417546,0.134511,1.180327,0.104076,1.214181,0.121845,0.20306,0.089002,0.203394,0.012727,3,1,0.666667
naps-llama-3_1_instruct-v0.6.0,3934,100,707.233863,706.99905,707.498967,0.19547,0.82719,0.827138,0.827237,3.9e-05,0.687208,0.687208,0.687208,0.0,0.139982,0.13993,0.140028,3.9e-05,1,0,1.0


## 5_ckpts-100_reqs-total_25_qps_exponential_dominant_spread

In [33]:
filename = '5_ckpts-100_reqs-25_qps_exponential_dominant_spread'
runname = '20250925-191056.151048'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [34]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [35]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [36]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,711.449531,711.200948,711.732445,0.20854,0.825823,0.825767,0.825871,4.1e-05,0.68667,0.68667,0.68667,0.0,0.139153,0.139097,0.139201,4.1e-05,1,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,897.254872,896.789798,1111.933761,96.049128,0.514374,0.254821,0.514431,0.116081,0.404037,0.165787,0.404037,0.106549,0.110337,0.089034,0.110394,0.009532,2,0,1.0
Llama-3.1-EIRAI-8B,3934,100,902.720527,902.057686,1029.989374,69.779089,0.50475,0.336047,0.504831,0.092415,0.395082,0.23993,0.395082,0.08498,0.109668,0.096117,0.109749,0.007435,2,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,487.40178,484.144928,707.37121,62.485888,1.382647,0.771339,1.384484,0.173762,1.17934,0.631384,1.181189,0.155818,0.203118,0.139955,0.204484,0.01797,4,1,0.75
naps-llama-3_1_instruct-v0.6.0,3934,100,899.396547,898.950657,1111.85635,95.05579,0.507159,0.244048,0.507214,0.117672,0.397085,0.155007,0.397085,0.108261,0.110074,0.08904,0.110128,0.009411,2,0,1.0


## 5_ckpts-100_reqs-5_qps_exponential_dominant_spread

In [37]:
filename = '5_ckpts-100_reqs-5_qps_exponential_dominant_spread'
runname = '20250925-191204.961342'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [38]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [39]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [40]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1090.241964,1005.44349,1114.249171,54.753264,0.17352,0.171633,0.342901,0.093438,0.082873,0.082317,0.244437,0.088623,0.090806,0.088849,0.098464,0.004866,4,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1110.925279,1106.776809,1126.855462,7.93948,0.171848,0.170214,0.17223,0.000874,0.082733,0.081928,0.082922,0.000402,0.089115,0.087855,0.089449,0.000629,5,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1112.905299,1021.885688,1120.137512,51.149938,0.240521,0.171204,0.25019,0.041513,0.152139,0.082279,0.153311,0.038544,0.088956,0.088382,0.09688,0.00444,4,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1025.913054,489.895713,1125.865531,206.86374,0.249335,0.170188,1.380838,0.403238,0.152855,0.08182,1.178754,0.365557,0.0965,0.087932,0.202084,0.037703,25,1,0.96
naps-llama-3_1_instruct-v0.6.0,3934,100,1108.830878,1018.919274,1119.778049,53.164465,0.171738,0.170619,0.338471,0.09163,0.083328,0.081792,0.241309,0.087021,0.089283,0.08841,0.097162,0.004623,3,0,1.0


## 5_ckpts-100_reqs-2.5_qps_exponential_dominant_spread

In [41]:
filename = '5_ckpts-100_reqs-2.5_qps_exponential_dominant_spread'
runname = '20250925-191320.107037'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [42]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [43]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [44]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1118.637751,1106.496628,1120.545587,5.781108,0.17139,0.170916,0.172718,0.000726,0.082786,0.082524,0.083789,0.000518,0.0885,0.08835,0.089472,0.000462,4,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1023.596119,1016.880652,1120.775449,44.963021,0.249636,0.170227,0.343772,0.073572,0.152918,0.081895,0.246416,0.070424,0.096718,0.088332,0.097357,0.003894,3,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1117.385502,1102.068045,1120.158665,7.140463,0.171494,0.170972,0.171998,0.000413,0.082894,0.082101,0.083119,0.000395,0.0886,0.08838,0.089831,0.000573,4,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1113.972666,833.335533,1122.147504,85.012187,0.171743,0.170066,0.425145,0.088015,0.082847,0.081605,0.306746,0.079899,0.088871,0.088224,0.1188,0.008529,23,1,0.956522
naps-llama-3_1_instruct-v0.6.0,3934,100,1116.007063,1114.781643,1122.144472,2.956806,0.171155,0.16971,0.172093,0.000904,0.082532,0.081486,0.083384,0.000723,0.088709,0.088224,0.088807,0.000234,5,0,1.0


## 5_ckpts-100_reqs-0.5_qps_exponential_dominant_spread

In [45]:
filename = '5_ckpts-100_reqs-0.5_qps_exponential_dominant_spread'
runname = '20250925-191421.733354'
grafana_path = f'../data/bundle_tests/amit_tests_v2/{filename}/grafana-{filename}.csv'
jsons_path = f'../data/bundle_tests/amit_tests_v2/{filename}/{runname}'

get_reports(grafana_path, jsons_path, filename)

In [46]:
_ = plot_batch_size_histogram_grid(grafana_path, file_name=filename)

In [47]:
_ = plot_total_batch_size_histogram(grafana_path, file_name=filename)

In [48]:
df_summary = create_summary(grafana_path)
# df_summary.reset_index(inplace=True)
display(df_summary)
output_dir = os.path.dirname(grafana_path)
df_summary.to_csv(f'{output_dir}/model_ss_summary-{filename}.csv')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Tokens/s_median,Tokens/s_min,Tokens/s_max,Tokens/s_std,Latency_median,Latency_min,Latency_max,Latency_std,TTFT_median,TTFT_min,TTFT_max,TTFT_std,Decoding Time_median,Decoding Time_min,Decoding Time_max,Decoding Time_std,candidate_switch_sum,switch_hit_sum,LRU Hit Rate
model,prompt_tokens_count,completion_tokens_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hermes-3-Llama-3.1-8B,3919,100,1113.421559,1103.802354,1115.125927,4.491552,0.171644,0.171013,0.172019,0.000365,0.082637,0.082234,0.082783,0.000234,0.088915,0.088779,0.08969,0.000362,5,0,1.0
LLaMa3.1-8B-Legal-ThaiCCL-Combine,3906,100,1115.404503,1109.082618,1121.147662,5.49776,0.170754,0.170622,0.171804,0.000592,0.082452,0.081992,0.082625,0.000253,0.088757,0.088302,0.089263,0.000438,5,0,1.0
Llama-3.1-EIRAI-8B,3934,100,1120.164708,1113.540994,1125.060207,5.19979,0.170695,0.169949,0.171849,0.000711,0.08216,0.08183,0.082944,0.00047,0.08838,0.087995,0.088906,0.000411,5,0,1.0
Meta-Llama-3.1-8B-Instruct,3934,100,1115.184327,1032.327273,1129.122955,13.833845,0.171413,0.169638,0.247786,0.012026,0.082625,0.081317,0.151886,0.010898,0.088775,0.087679,0.0959,0.001179,17,0,1.0
naps-llama-3_1_instruct-v0.6.0,3934,100,1118.764336,1111.514081,1122.812021,4.357639,0.171077,0.170586,0.172012,0.000571,0.082609,0.082238,0.082944,0.000265,0.08849,0.088171,0.089068,0.000346,5,0,1.0


## Compare batch sizes

### all runs

In [7]:
import pandas as pd
import plotly.graph_objects as go

def comparative_bar_plots(datasets):
    """
    Plot comparative batch size distributions for N datasets.

    Parameters:
        datasets (dict): Mapping of {series_name: csv_path} where each CSV 
                         contains a 'batch_size' column.
    """
    dfs = []
    totals = []
    series_names = list(datasets.keys())

    # Process each dataset into percentage distributions
    for i, (series_name, path) in enumerate(datasets.items()):
        df = pd.read_csv(path)
        batch_size_counts = df.groupby("batch_size").size().to_frame(name=f"count_{i}").reset_index()
        dfs.append(batch_size_counts)
        totals.append(df.shape[0])

    # Merge all datasets on batch_size
    df_merged = dfs[0]
    for i in range(1, len(dfs)):
        df_merged = df_merged.merge(dfs[i], on="batch_size", how="outer")

    df_merged.fillna(0, inplace=True)

    # Normalize counts to percentages
    for i, total in enumerate(totals):
        df_merged[f"percent_{i}"] = (df_merged[f"count_{i}"] / total * 100).round(2)

    # Ensure batch_size is categorical (string) to remove gaps
    df_merged.sort_values("batch_size", key=lambda x: x.astype(int), inplace=True)
    x_values = df_merged["batch_size"].astype(str)

    # Build the figure
    fig = go.Figure()
    for i, series_name in enumerate(series_names):
        y_values = df_merged[f"percent_{i}"] / 100  # convert to fraction
        fig.add_trace(go.Bar(name=series_name, x=x_values, y=y_values))

    # Layout
    fig.update_layout(
        barmode="group",
        title="Batch Size Distribution Comparison",
        xaxis_title="Batch Size",
        yaxis_title="Percentage",
        xaxis=dict(type="category"),
        yaxis=dict(tickformat=".0%")
    )

    fig.show()


In [8]:
def load_json_logs_to_df(dir_path: str) -> pd.DataFrame:
    """
    Load multiple JSON files containing lists of dicts into a single DataFrame.
    
    Args:
        dir_path (str): Path to directory containing JSON files.

    Returns:
        pd.DataFrame: Combined DataFrame with all records.
    """
    all_records = []

    for root, _, files in os.walk(dir_path):
        for filename in files:
            if filename.endswith(".json"):  # adjust if your extension differs
                file_path = os.path.join(root, filename)
                with open(file_path, "r") as f:
                    try:
                        data = json.load(f)  # expects list[dict]
                        if isinstance(data, list):
                            all_records.extend(data)
                        else:
                            pass
                            # print(f"Skipping {file_path}: not a list of dicts")
                    except json.JSONDecodeError as e:
                        print(f"Error reading {file_path}: {e}")

    return pd.DataFrame(all_records)

import plotly.graph_objects as go
from plotly.subplots import make_subplots

def comparative_dist_plots(kit_paths: dict):
    """
    Plot comparative TTFT histograms for multiple datasets.
    - Each dataset in its own subplot (stacked vertically)
    - All share the same x-axis
    - Histograms normalized to density
    - No fill (only colored borders)
    - Legend shows dataset title
    """
    # one row per dataset
    fig = make_subplots(
        rows=len(kit_paths),
        cols=1,
        shared_xaxes=True,
        subplot_titles=list(kit_paths.keys())
    )

    # loop through datasets
    for i, (title, path) in enumerate(kit_paths.items(), start=1):
        df = load_json_logs_to_df(path)
        fig.add_trace(
            go.Histogram(
                x=df['client_ttft_s'],
                nbinsx=50,
                name=title,
                opacity=1,
                showlegend=False  # we already use subplot titles
            ),
            row=i,
            col=1
        )

    # auto-assign distinct colors to outlines
    fig.update_traces(marker_line_color=None)

    fig.update_layout(
        title='Comparative TTFT Distributions',
        xaxis_title='Time to First Token (s)',
        bargap=0.0,
        height=300 * len(kit_paths),  # scale height with number of plots
    )

    fig.show()


In [8]:
qps=50
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [9]:
qps=25
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [10]:
qps=5
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [11]:
qps=2.5
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

In [12]:
qps=0.5
datasets = {
    "Single model (100)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model (100 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv',
}
comparative_bar_plots(datasets)

### comparing all 100s

In [12]:
qps = 50
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

In [13]:
qps = 25
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

In [14]:
qps = 5
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

In [15]:
qps = 2.5
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

In [16]:
qps = 0.5
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-100_reqs-{qps}_qps_exponential/grafana-1_ckpts-100_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-100_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-100_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

### comparing all 1ks

In [9]:

def calculate_total_tokens_per_s(title: str, path: str) -> pd.DataFrame:

    df = load_json_logs_to_df(path)

    df["start_time_dt"] = pd.to_datetime(df["start_time"], format="%H:%M:%S.%f")
    df["end_time_dt"] = pd.to_datetime(df["end_time"], format="%H:%M:%S.%f")

    first_starting_time = df["start_time_dt"].min()
    last_end_time = df["end_time_dt"].max()
    
    number_input_tokens = df["number_input_tokens"].sum()
    number_output_tokens = df["number_output_tokens"].sum()
    number_total_tokens = number_input_tokens + number_output_tokens

    total_tok_s = number_total_tokens / (last_end_time - first_starting_time).total_seconds()
    
    output = {
        title: {
            'number_input_tokens': number_input_tokens,
            'number_output_tokens': number_output_tokens,
            'number_total_tokens': number_total_tokens,
            'first_starting_time': first_starting_time,
            'last_end_time': last_end_time,
            'total_duration_s': (last_end_time - first_starting_time).total_seconds(),
            'total_tokens_per_s': total_tok_s
        }
    }
    
    return pd.DataFrame.from_dict(output, orient='index')

def get_tok_s_summary(kit_paths: dict) -> pd.DataFrame:
    summary_dfs = []
    for title, path in kit_paths.items():
        df_summary = calculate_total_tokens_per_s(title, path)
        summary_dfs.append(df_summary)
    
    return pd.concat(summary_dfs)



In [10]:
import pandas as pd
import plotly.graph_objects as go

def plot_bar_line_comparison(summary_df: pd.DataFrame):
    titles = summary_df.index.tolist()
    tokens_per_s = summary_df["total_tokens_per_s"].tolist()
    duration_s = summary_df["total_duration_s"].tolist()
    
    fig = go.Figure()

    # Bars (tokens/sec), one per title, each in different color
    for t, tok_s in zip(titles, tokens_per_s):
        fig.add_trace(go.Bar(
            x=[t],
            y=[tok_s],
            name=f"{t} - Tokens/sec",   # legend shows the title
            yaxis="y1",
            opacity=0.8,
            text=[f"{tok_s:,.0f}"],     # rounded, thousands separator
            textposition="outside"
        ))

    # Line (duration), added last so it overlays the bars
    fig.add_trace(go.Scatter(
        x=titles,
        y=duration_s,
        name="Duration (s)",
        mode="lines+markers",
        text=[f"{d:,.0f}" for d in duration_s],
        textposition="top center",
        yaxis="y2",
        line=dict(width=3, color="black")
    ))

    # Calculate padding for bar labels
    y_max_tok_per_s = max(tokens_per_s) * 1.2

    # Layout with secondary axis + hide x labels + legend below
    fig.update_layout(
        title="Comparative Tokens/sec (bars) vs Duration (line)",
        xaxis=dict(
            title="",
            showticklabels=False
        ),
        yaxis=dict(
            title="Tokens per Second",
            side="left",
            range=[0, y_max_tok_per_s]   # add headroom for labels
        ),
        yaxis2=dict(
            title="Duration (s)",
            overlaying="y",
            side="right",
        ),
        barmode="group",
        legend=dict(
            orientation="h",
            yanchor="top",
            y=-0.2,          # position below chart
            xanchor="center",
            x=0.5
        )
    )
    
    fig.show()


In [23]:
qps = 50
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

datasets_amit = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets_amit)

kit_paths_1k = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
comparative_dist_plots(kit_paths_1k)

summary_df = get_tok_s_summary(kit_paths_1k)
plot_bar_line_comparison(summary_df)

# request from amit
kit_paths_amit = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
summary_df = get_tok_s_summary(kit_paths_amit)
plot_bar_line_comparison(summary_df)

In [24]:

qps = 25
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

datasets_amit = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets_amit)

kit_paths_1k = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
comparative_dist_plots(kit_paths_1k)

summary_df = get_tok_s_summary(kit_paths_1k)
plot_bar_line_comparison(summary_df)

# request from amit
kit_paths_amit = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
summary_df = get_tok_s_summary(kit_paths_amit)
plot_bar_line_comparison(summary_df)

In [25]:

qps = 5
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

datasets_amit = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets_amit)

kit_paths_1k = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
comparative_dist_plots(kit_paths_1k)

summary_df = get_tok_s_summary(kit_paths_1k)
plot_bar_line_comparison(summary_df)

# request from amit
kit_paths_amit = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
summary_df = get_tok_s_summary(kit_paths_amit)
plot_bar_line_comparison(summary_df)

In [26]:

qps = 2.5
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

datasets_amit = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets_amit)

kit_paths_1k = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
comparative_dist_plots(kit_paths_1k)

summary_df = get_tok_s_summary(kit_paths_1k)
plot_bar_line_comparison(summary_df)

# request from amit
kit_paths_amit = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
summary_df = get_tok_s_summary(kit_paths_amit)
plot_bar_line_comparison(summary_df)

In [27]:

qps = 0.5
datasets = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with uniform usage": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread/grafana-5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets)

datasets_amit = {
    "Single model": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential/grafana-1_ckpts-1000_reqs-{qps}_qps_exponential.csv',
    "Five models with a dominant model": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread/grafana-5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread.csv'

}
comparative_bar_plots(datasets_amit)


kit_paths_1k = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with uniform usage (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{int(qps/5) if qps/5 >= 1 else qps/5}_qps_exponential_equally_spread',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
comparative_dist_plots(kit_paths_1k)

summary_df = get_tok_s_summary(kit_paths_1k)
plot_bar_line_comparison(summary_df)

# request from amit
kit_paths_amit = {
    "Single model (1,000)": f'../data/bundle_tests/amit_tests_v2/1_ckpts-1000_reqs-{qps}_qps_exponential',
    "Five models with a dominant model (1,000 reqs)": f'../data/bundle_tests/amit_tests_v2/5_ckpts-1000_reqs-{qps}_qps_exponential_dominant_spread',
}
summary_df = get_tok_s_summary(kit_paths_amit)
plot_bar_line_comparison(summary_df)