In [9]:
import os
import json
import pandas as pd
import numpy as np

In [10]:
# Load JSON

output_xlsx_file_name = 'combined_results-llama-3.2-1b_tp1_bf16_random.xlsx'
json_files_dir = '../results/llama3.2-1b/llama3.2-1b-tp1-bf16-nocaching-random'

json_dfs = []

for json_file in os.listdir(json_files_dir):
    if not json_file.endswith('.json'):
        continue
    
    with open(os.path.join(json_files_dir, json_file), 'r') as f:
        data = json.load(f)

    # Remove unwanted fields
    unwanted_fields = {"ttfts", "itls", "generated_texts", "errors"}
    data = {k: v for k, v in data.items() if k not in unwanted_fields}

    # Compute statistics for input_lens and output_lens
    for key in ["input_lens", "output_lens"]:
        if key in data:
            values = np.array(data[key])
            prefix = "input" if key == "input_lens" else "output"
            data[f"mean_{prefix}_lens"] = np.mean(values)
            data[f"median_{prefix}_lens"] = np.median(values)
            data[f"std_{prefix}_lens"] = np.std(values)
            data[f"p99_{prefix}_lens"] = np.percentile(values, 99)
            del data[key]  # Remove the original list
    
    # Add requested input and output tokens, and concurrency
    input_tokens = json_file.split('_')[-4]
    output_tokens = json_file.split('_')[-3]
    
    data["input_tokens"] = int(input_tokens)
    data["output_tokens"] = int(output_tokens)
    
    # Convert to DataFrame
    df_data = pd.DataFrame([data])
    
    json_dfs.append(df_data)

# Concatenate all DataFrames
df = pd.concat(json_dfs, ignore_index=True)
df.head()

Unnamed: 0,date,backend,model_id,tokenizer_id,num_prompts,request_rate,burstiness,max_concurrency,duration,completed,...,mean_input_lens,median_input_lens,std_input_lens,p99_input_lens,mean_output_lens,median_output_lens,std_output_lens,p99_output_lens,input_tokens,output_tokens
0,20250624-162606,vllm,meta-llama/Llama-3.2-1B-Instruct,meta-llama/Llama-3.2-1B-Instruct,16,inf,1.0,16,16.292616,16,...,19999.0,19999.0,0.0,19999.0,2000.0,2000.0,0.0,2000.0,20000,2000
1,20250624-162320,vllm,meta-llama/Llama-3.2-1B-Instruct,meta-llama/Llama-3.2-1B-Instruct,8,inf,1.0,8,10.096462,8,...,19999.0,19999.0,0.0,19999.0,2000.0,2000.0,0.0,2000.0,20000,2000
2,20250624-162432,vllm,meta-llama/Llama-3.2-1B-Instruct,meta-llama/Llama-3.2-1B-Instruct,16,inf,1.0,16,2.606409,16,...,999.0,999.0,0.0,999.0,1000.0,1000.0,0.0,1000.0,1000,1000
3,20250624-162657,vllm,meta-llama/Llama-3.2-1B-Instruct,meta-llama/Llama-3.2-1B-Instruct,32,inf,1.0,32,14.114119,32,...,127.0,127.0,0.0,127.0,4096.0,4096.0,0.0,4096.0,128,4096
4,20250624-163136,vllm,meta-llama/Llama-3.2-1B-Instruct,meta-llama/Llama-3.2-1B-Instruct,64,inf,1.0,64,9.78666,64,...,999.0,999.0,0.0,999.0,2000.0,2000.0,0.0,2000.0,1000,2000


In [12]:
df.to_excel(os.path.join(json_files_dir,output_xlsx_file_name), index=False)

In [None]:
# compare two line charts
# import matplotlib.pyplot as plt 

# # Group by (input_tokens, output_tokens)
# # Plotting
# plt.figure(figsize=(12, 6))

# df_tp8.sort_values(by='concurrency', inplace=True)
# df_tp4.sort_values(by='concurrency', inplace=True)

# plt.plot(df_tp8['concurrency'], df_tp8['output_throughput'], marker='o', label='Llama 3.3 70B TP8 ShareGPT')
# plt.plot(df_tp4['concurrency'], df_tp4['output_throughput'], marker='o', label='Llama 3.3 70B TP4 ShareGPT')
# plt.title('Output Throughput vs Concurrency (Scatter)')

# # Set x-axis ticks and log scale
# concurrency_ticks = sorted(df['concurrency'].unique())
# plt.xscale('log', base=2)
# plt.xticks(concurrency_ticks, labels=[str(c) for c in concurrency_ticks])
# plt.xlabel('Concurrency (log scale)')
# plt.ylabel('Output Throughput')
# plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# # Add legend
# plt.legend()
# plt.tight_layout()

# plt.show()