# LLM Performance Results Analysis

This notebook reads all *_summary.json files from the result_outputs folder and loads them into a pandas DataFrame for analysis.

In [None]:
import pandas as pd
import json
import glob
import os
from pathlib import Path

In [None]:
# Define the path to the result_outputs folder
results_path = Path('result_outputs')

# Find all files ending with _summary.json
summary_files = list(results_path.glob('*_summary.json'))
print(f"Found {len(summary_files)} summary files:")
for file in summary_files:
    print(f"  - {file.name}")

In [None]:
# Function to read and parse JSON files
def read_summary_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

In [None]:
# Read all summary files and collect data
all_data = []

for file_path in summary_files:
    try:
        data = read_summary_json(file_path)
        # Add filename for reference
        data['filename'] = file_path.name
        all_data.append(data)
        print(f"Successfully loaded: {file_path.name}")
    except Exception as e:
        print(f"Error loading {file_path.name}: {e}")

print(f"\nLoaded {len(all_data)} files successfully")

In [None]:
# Convert to pandas DataFrame
df = pd.DataFrame(all_data)

print(f"DataFrame shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
df.head()

In [None]:
# cleanup
## drop columns that are not needed 'version'
df.drop(columns=['version'], inplace=True, errors='ignore')

In [None]:
# Clean up model names - remove prefix up to '/'
df['model'] = df['model'].str.split('/').str[-1]
print("Cleaned model names:")
print(df['model'].unique())

In [None]:
# Display basic info about the DataFrame
print("DataFrame Info:")
df.info()

print("\nFirst few rows:")
df.head()

In [None]:
# Save the DataFrame to CSV file in result_outputs folder
csv_filename = 'result_outputs/llm_performance_results.csv'
df.to_csv(csv_filename, index=False)
print(f"DataFrame saved to {csv_filename}")
print(f"Saved {len(df)} rows and {len(df.columns)} columns")

# Plot Time to First Token (TTFT) by model
plt.figure(figsize=(12, 6))
df_sorted_ttft = df.sort_values('results_ttft_s_mean', ascending=True)

sns.barplot(data=df_sorted_ttft, x='model', y='results_ttft_s_mean', hue='model', palette='plasma', legend=False)
plt.title('Time to First Token (TTFT) by Model (Sorted Low to High, Lower is Better)')
plt.xlabel('Model')
plt.ylabel('Mean TTFT (seconds)')
plt.xticks(rotation=60, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Plot a bar graph of 'model' vs 'results_num_completed_requests_per_min', sorted by throughput (low to high)
import matplotlib.pyplot as plt
import seaborn as sns

# Sort the DataFrame by throughput
df_sorted = df.sort_values('results_num_completed_requests_per_min', ascending=True)

plt.figure(figsize=(12, 6))
sns.barplot(data=df_sorted, x='model', y='results_num_completed_requests_per_min', hue='model', palette='viridis', legend=False)
plt.title('Throughput: Completed Requests per Minute by Model (Sorted Low to High, Higher is Better)')
plt.xlabel('Model')
plt.ylabel('Completed Requests per Minute')
plt.xticks(rotation=60, ha='right')
plt.tight_layout()
plt.show()

## Latency Analysis

In [None]:
# Create a comprehensive latency comparison chart
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Inter-token Latency
df_sorted_inter = df.sort_values('results_inter_token_latency_s_mean', ascending=True)
sns.barplot(data=df_sorted_inter, x='model', y='results_inter_token_latency_s_mean', 
           hue='model', palette='viridis', legend=False, ax=axes[0,0])
axes[0,0].set_title('Inter-token Latency by Model (Lower is Better)')
axes[0,0].set_xlabel('Model')
axes[0,0].set_ylabel('Mean Inter-token Latency (seconds)')
axes[0,0].tick_params(axis='x', rotation=60, labelsize=9)

# 2. End-to-End Latency
df_sorted_e2e = df.sort_values('results_end_to_end_latency_s_mean', ascending=True)
sns.barplot(data=df_sorted_e2e, x='model', y='results_end_to_end_latency_s_mean', 
           hue='model', palette='plasma', legend=False, ax=axes[0,1])
axes[0,1].set_title('End-to-End Latency by Model (Lower is Better)')
axes[0,1].set_xlabel('Model')
axes[0,1].set_ylabel('Mean End-to-End Latency (seconds)')
axes[0,1].tick_params(axis='x', rotation=60, labelsize=9)

# 3. Latency Distribution (Box plot for TTFT)
sns.boxplot(data=df.melt(id_vars=['model'], 
                        value_vars=['results_ttft_s_quantiles_p25', 'results_ttft_s_quantiles_p50', 
                                   'results_ttft_s_quantiles_p75', 'results_ttft_s_quantiles_p90']),
           x='model', y='value', hue='model', palette='Set2', legend=False, ax=axes[1,0])
axes[1,0].set_title('TTFT Distribution (P25, P50, P75, P90)')
axes[1,0].set_xlabel('Model')
axes[1,0].set_ylabel('TTFT (seconds)')
axes[1,0].tick_params(axis='x', rotation=60, labelsize=9)

# 4. Combined Latency Comparison
latency_metrics = ['results_ttft_s_mean', 'results_inter_token_latency_s_mean', 'results_end_to_end_latency_s_mean']
df_melted = df.melt(id_vars=['model'], value_vars=latency_metrics, 
                   var_name='latency_type', value_name='latency_seconds')
df_melted['latency_type'] = df_melted['latency_type'].str.replace('results_', '').str.replace('_s_mean', '').str.replace('_', ' ').str.title()

sns.barplot(data=df_melted, x='model', y='latency_seconds', hue='latency_type', ax=axes[1,1])
axes[1,1].set_title('Latency Metrics Comparison')
axes[1,1].set_xlabel('Model')
axes[1,1].set_ylabel('Latency (seconds)')
axes[1,1].tick_params(axis='x', rotation=60, labelsize=9)
axes[1,1].legend(title='Latency Type', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

## Throughput Analysis

In [None]:
# Create comprehensive throughput analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Overall Output Throughput (tokens/sec)
df_sorted_throughput = df.sort_values('results_mean_output_throughput_token_per_s', ascending=False)
sns.barplot(data=df_sorted_throughput, x='model', y='results_mean_output_throughput_token_per_s', 
           hue='model', palette='viridis', legend=False, ax=axes[0,0])
axes[0,0].set_title('Overall Output Throughput by Model (Higher is Better)')
axes[0,0].set_xlabel('Model')
axes[0,0].set_ylabel('Mean Output Throughput (tokens/sec)')
axes[0,0].tick_params(axis='x', rotation=60, labelsize=9)

# 2. Request Throughput (requests/min)
df_sorted_requests = df.sort_values('results_num_completed_requests_per_min', ascending=False)
sns.barplot(data=df_sorted_requests, x='model', y='results_num_completed_requests_per_min', 
           hue='model', palette='plasma', legend=False, ax=axes[0,1])
axes[0,1].set_title('Request Throughput by Model (Higher is Better)')
axes[0,1].set_xlabel('Model')
axes[0,1].set_ylabel('Completed Requests per Minute')
axes[0,1].tick_params(axis='x', rotation=60, labelsize=9)

# 3. Per-Request Output Throughput Distribution
sns.boxplot(data=df.melt(id_vars=['model'], 
                        value_vars=['results_request_output_throughput_token_per_s_quantiles_p25',
                                   'results_request_output_throughput_token_per_s_quantiles_p50',
                                   'results_request_output_throughput_token_per_s_quantiles_p75',
                                   'results_request_output_throughput_token_per_s_quantiles_p90']),
           x='model', y='value', hue='model', palette='tab10', legend=False, ax=axes[1,0])
axes[1,0].set_title('Per-Request Throughput Distribution (P25, P50, P75, P90)')
axes[1,0].set_xlabel('Model')
axes[1,0].set_ylabel('Per-Request Throughput (tokens/sec)')
axes[1,0].tick_params(axis='x', rotation=60, labelsize=9)

# 4. Throughput vs Latency Scatter Plot
axes[1,1].scatter(df['results_ttft_s_mean'], df['results_mean_output_throughput_token_per_s'], 
                 s=100, alpha=0.7, c=range(len(df)), cmap='viridis')
axes[1,1].set_xlabel('Mean TTFT (seconds)')
axes[1,1].set_ylabel('Mean Output Throughput (tokens/sec)')
axes[1,1].set_title('Throughput vs TTFT Trade-off')

# Add model labels to scatter plot
for i, model in enumerate(df['model']):
    axes[1,1].annotate(model.split('/')[-1], 
                      (df.iloc[i]['results_ttft_s_mean'], 
                       df.iloc[i]['results_mean_output_throughput_token_per_s']),
                      xytext=(5, 5), textcoords='offset points', fontsize=8, rotation=45)

plt.tight_layout()
plt.show()

## Save it a HTML

In [None]:
!!jupyter nbconvert  --to HTML *.ipynb