In [1]:

import os
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
def assign_groups(df, threshold=0.3):
    df = df.sort_values("start_time").reset_index(drop=True)
    group_ids = []
    current_ttft = None
    current_group = 0

    for _, row in df.iterrows():
        ttft = row["server_ttft_s"]
        if current_ttft is None:
            # first group
            current_group += 1
            current_ttft = ttft
        elif abs(ttft - current_ttft) > threshold:
            # start new group
            current_group += 1
            current_ttft = ttft
        group_ids.append(current_group)

    df["group_id"] = group_ids
    return df

In [3]:
tests_dirs = os.listdir('../data/bundle_tests/amit_tests/')
# tests_dirs = tests_dirs[:1]
tests_dirs

['test_1', 'summary_bundle_tests.xlsx', 'test_3', 'test_4', 'test_2']

In [None]:
df_summary = pd.DataFrame()

for test_dir in tests_dirs:
    print(f"\n--- Analyzing test: {test_dir} ---")
    
    folder = Path(f"../data/bundle_tests/amit_tests/{test_dir}")
    json_files = list(folder.rglob("*.json"))
    run_files = [f for f in json_files if f.suffix == ".json" and "individual_responses" in f.name]
    # run_files = run_files[:1]
    
    for run_file in run_files:
        print(f"\nRun file: {run_file}")

        df = pd.read_json(str(run_file))
        df = df[df['error_code'].isnull()]
        df["start_time"] = pd.to_datetime(df["start_time"], format="%H:%M:%S.%f")
        
        prefill_time_s_median = df['server_ttft_s'].median()
        total_decode_time_s_median = df['server_end_to_end_latency_s'].median() - prefill_time_s_median
        
        # Apply grouping
        df_with_groups = assign_groups(df, threshold=0.3)
        
        # Create summary DataFrame
        # summary = (
        #     df_with_groups.groupby("group_id")
        #     .agg(
        #         group_size=("server_ttft_s", "size"),
        #         median_ttft=("server_ttft_s", "median"),
        #         min_ttft=("server_ttft_s", "min"),
        #         max_ttft=("server_ttft_s", "max"),
        #         min_start=("start_time", "min"),
        #         max_end=("end_time", "max")
        #     )
        #     .reset_index()
        # )
        
        # ---- Count model switches ----
        model_switches = 0
        model_switch_hit = 0

        grouped = df_with_groups.groupby("group_id")

        # Sort groups by ID so we can walk sequentially
        for gid in sorted(grouped.groups.keys()):
            if gid == 1:
                continue  # no switch before the first group
            model_switches += 1

            group_df = grouped.get_group(gid).sort_values("start_time")

            if len(group_df) > 1:
                first_ttft = group_df.iloc[0]["server_ttft_s"]
                median_rest = np.median(group_df.iloc[1:]["server_ttft_s"]  )
                if first_ttft - median_rest > 0.05:  # 50 ms
                    model_switch_hit += 1
        
        lru_hit_rate = (model_switches - model_switch_hit)/model_switches if model_switches > 0 else np.nan
        
        df_summary = pd.concat([pd.DataFrame({
            "test_dir": [test_dir],
            "run_file": [run_file.name],
            "num_requests": [len(df)],
            "num_groups": [df_with_groups["group_id"].nunique()],
            "median_prefill_time_s": [prefill_time_s_median],
            "median_total_decode_time_s": [total_decode_time_s_median],
            "model_switches": [model_switches],
            "model_switch_hits": [model_switch_hit],
            "lru_hit_rate": [lru_hit_rate]
        }),df_summary])
            
    


--- Analyzing test: test_1 ---

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_40_stream_91973997-cc8c-4735-8952-ac6a6fe18743_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_60_stream_66b6cbc0-2c6a-46ac-9789-13d7a2d907b3_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_10_stream_77f21779-0ff5-4399-9049-f7711e34df85_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_1/20250822-171526.267781/synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_100_90_stream_8059cbec-22dc-412c-9184-0fa7d9c8ae9a_individual_responses.json

--- Analyzing test: test_3 ---

Run file: ../data/bundle_tests/amit_tests/test_3/20250822-171614.329885/synthetic_0_Llama-3-1-OpenScholar-8B_3900_100_90_stream_b9dbe4f8-80d5-4c40-89ff-17b99c2

  arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)



Run file: ../data/bundle_tests/amit_tests/test_3/20250822-171614.329885/synthetic_0_narrativAIV2_3900_100_60_stream_c2257938-eba6-4a45-b700-6d06a072b2af_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_3/20250822-171614.329885/synthetic_0_Qwen3-32B_8000_100_10_stream_d31847a9-af17-471d-acea-c00705a1e08b_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_3/20250822-171614.329885/synthetic_0_natsumura-assistant-1-0-llama-3-1-8b_3900_100_10_stream_56bc1117-4ee0-47f8-a4a0-fc369576e981_individual_responses.json

--- Analyzing test: test_4 ---

Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_Smoothie-Qwen3-32B_8000_100_60_stream_64dc929a-9576-483f-a412-ee174283fe2c_individual_responses.json

Run file: ../data/bundle_tests/amit_tests/test_4/20250822-163616.524981/synthetic_0_EVA-Qwen2-5-14B-v0-0_3900_100_60_stream_ab002736-b3b5-49d8-be51-cbe726bc4e28_individual_responses.json

Run file: ../data/bundle_tests/a

In [25]:
df_summary.sort_values(["test_dir", "run_file"], inplace=True)
df_summary

Unnamed: 0,test_dir,run_file,num_requests,num_groups,median_prefill_time_s,median_total_decode_time_s,model_switches,model_switch_hits,lru_hit_rate
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,10,5,0.596143,0.139529,4,0,1.0
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,40,13,1.177537,0.202860,12,0,1.0
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,60,4,1.176736,0.202699,3,0,1.0
0,test_1,synthetic_0_Meta-Llama-3-1-8B-Instruct_3900_10...,90,20,1.176889,0.202706,19,0,1.0
0,test_2,synthetic_0_Hermes-3-Llama-3-1-8B_3900_100_10_...,10,4,0.659311,0.140115,3,0,1.0
...,...,...,...,...,...,...,...,...,...
0,test_4,synthetic_0_narrativAIV2_3900_100_90_stream_b0...,90,19,1.179694,0.201901,18,0,1.0
0,test_4,synthetic_0_natsumura-assistant-1-0-llama-3-1-...,10,2,1.253460,0.200806,1,0,1.0
0,test_4,synthetic_0_natsumura-assistant-1-0-llama-3-1-...,40,6,1.178718,0.202185,5,0,1.0
0,test_4,synthetic_0_natsumura-assistant-1-0-llama-3-1-...,60,3,1.180109,0.200830,2,0,1.0


In [26]:
df_summary.to_excel("../data/bundle_tests/amit_tests/summary_bundle_tests.xlsx", index=False)