In [2]:
import json
import os
import sys

sys.path.append('../')
sys.path.append('../../')
sys.path.append('../src')
sys.path.append('../prompts')
sys.path.append('../src/llmperf')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from benchmarking.src.llmperf import llmperf_utils
from benchmarking.src.performance_evaluation import SyntheticPerformanceEvaluator

  from .autonotebook import tqdm as notebook_tqdm


# Synthetic - Consolidate results


In [3]:
def read_json_files(folder_path):
    data = []
    
    # Iterate through all files in the folder
    for filename in os.listdir(folder_path):
        # Check if the file ends with 'individual_responses.json'
        if filename.endswith('individual_responses.json'):
            file_path = os.path.join(folder_path, filename)
            
            # Open and load the JSON file
            with open(file_path, 'r') as file:
                try:
                    json_data = json.load(file)
                    json_data = [{**request_response, 'filename': filename} for request_response in json_data]
                    data.append(json_data)
                except json.JSONDecodeError as e:
                    print(f"Error reading {file_path}: {e}")
    return data

In [4]:
results_dir = '../data/results/real_workload_tests/test'
all_responses = read_json_files(results_dir)
print(f'Number of files found {len(all_responses[0])}')

Number of files found 5


In [6]:
run_stats = []
for run in all_responses:
    metrics = {
        'filename': [],
        'input_tokens': [],
        'output_tokens': [],
        'concurrent_requests': [], 
        'server_number_input_tokens': [],
        'server_number_output_tokens': [],
        'server_ttft_s': [], 
        'server_output_token_per_s_per_request': [],
        'server_end_to_end_latency_s': [],
        'client_ttft_s': [], 
        'client_output_token_per_s_per_request': [], 
        'client_end_to_end_latency_s': []
    }
    for request_metrics in run:
        metrics['filename'].append(request_metrics['filename'])
        
        # It will skip the requests that have an error message
        if len(request_metrics['error_msg'])==0:
            input_tokens = int(request_metrics['filename'].split('_')[3])
            output_tokens = int(request_metrics['filename'].split('_')[4])
            concurrent_requests = int(request_metrics['filename'].split('_')[5])
            metrics['input_tokens'].append(input_tokens)
            metrics['output_tokens'].append(output_tokens)
            metrics['concurrent_requests'].append(concurrent_requests)
            
            metrics['server_number_input_tokens'].append(request_metrics['server_number_input_tokens'])
            metrics['server_number_output_tokens'].append(request_metrics['server_number_output_tokens'])
            metrics['server_ttft_s'].append(request_metrics['server_ttft_s'])
            metrics['server_output_token_per_s_per_request'].append(request_metrics['server_output_token_per_s_per_request'])
            metrics['server_end_to_end_latency_s'].append(request_metrics['server_end_to_end_latency_s'])
            metrics['client_ttft_s'].append(request_metrics['client_ttft_s'])
            metrics['client_output_token_per_s_per_request'].append(request_metrics['client_output_token_per_s_per_request'])
            metrics['client_end_to_end_latency_s'].append(request_metrics['client_end_to_end_latency_s'])
    df_metrics =  pd.DataFrame(metrics)
    # df_metric_stats = df_metrics.groupby(by='filename').agg(['median','std'])
    df_metric_stats = df_metrics.groupby(by='filename')[['server_ttft_s','server_output_token_per_s_per_request','server_end_to_end_latency_s','client_ttft_s','client_output_token_per_s_per_request','client_end_to_end_latency_s']].agg(['median','std'])
    df_metric_stats.style.format("{:,.3f}")
    df_parameters = df_metrics.groupby(by='filename')[['input_tokens','output_tokens','concurrent_requests']].agg(['first'])    
    df_parameters['request_count'] = df_metrics.shape[0]
    df_parameters.style.format("{:,.0f}")
    df = pd.concat([df_parameters, df_metric_stats], axis=1)
    run_stats.append(df)
results = pd.concat(run_stats)
results.columns = ['_'.join(col).strip() for col in results.columns.values]
results

Unnamed: 0_level_0,input_tokens_first,output_tokens_first,concurrent_requests_first,request_count_,server_ttft_s_median,server_ttft_s_std,server_output_token_per_s_per_request_median,server_output_token_per_s_per_request_std,server_end_to_end_latency_s_median,server_end_to_end_latency_s_std,client_ttft_s_median,client_ttft_s_std,client_output_token_per_s_per_request_median,client_output_token_per_s_per_request_std,client_end_to_end_latency_s_median,client_end_to_end_latency_s_std
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
synthetic_0_Meta-Llama-3-1-8B-Instruct_100_100_1_syntheticdataset_stream_individual_responses.json,100,100,1,5,0.024444,0.000413,1059.051517,3.974886,0.117808,0.000543,0.691354,0.096106,1073.511859,64.155438,0.784492,0.091407
