In [None]:
import sys

sys.path.append('../')
sys.path.append('../../')

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from typing import List

from benchmarking.utils import read_perf_eval_json_files

import warnings
warnings.filterwarnings('ignore')

## Read benchmarking output files
Responses with errors will be skipped.

In [None]:
# Specify the paths to the results directories of each provider
provider_results_dir_paths = [
    '../data/results/llmperf/provider1',
    '../data/results/llmperf/provider2',
    '../data/results/llmperf/provider3',
]

In [None]:
all_provider_responses: List[pd.DataFrame] = []

for provide_path in provider_results_dir_paths:
    provider_identifier = provide_path.split('/')[-1]
    provider_responses = {
        'provider': provider_identifier,
        'responses': read_perf_eval_json_files(provide_path, type='summary')
    }
    all_provider_responses.append(provider_responses)

In [None]:
run_stats = []
for provider_run in all_provider_responses:
    df = provider_run['responses'].copy()
    df['provider'] = provider_responses['provider']
    first_columns = ['name','provider','model','num_input_tokens','num_output_tokens','num_concurrent_requests']
    following_columns = [column for column in df.columns if column not in first_columns]
    df = df[first_columns + following_columns]
    run_stats.append(df)
    
results = pd.concat(run_stats)
results.head()

## Plot benchmarking charts among providers

You may change the pallette color based on the color that better identifies each provider. You can take the [following link](https://seaborn.pydata.org/tutorial/color_palettes.html) as reference. Also, you will need to update the suptitle to reflect the model you're showing and any other detail. 

### TTFT

In [None]:
# Pallette for the providers, change colors and provider names as needed
palette = {'provider1': 'orange', 'provider2': 'tab:purple', 'provider3': 'xkcd:blue'}

# Get unique concurrent values
concurrent_values = results['num_concurrent_requests'].unique()
concurrent_values.sort()

# Set up the figure and axes
fig, axes = plt.subplots(len(concurrent_values), 1, figsize=(10, 5 * len(concurrent_values)), sharex=False)

# Add a supertitle, it could be the model name
fig.suptitle('<<Model_name>>', fontsize=20)

# Plot each concurrent value
for ax, concurrent in zip(axes, concurrent_values):
    subset = results[results['num_concurrent_requests'] == concurrent]
    sns.barplot(data=subset, x='num_input_tokens', y='client_ttft_s_p50', hue='provider', ax=ax, palette=palette, errorbar=None)
    ax.set_title(f'Client TTFT (s) for Concurrent Requests: {concurrent}')
    ax.set_xlabel('Input Tokens')
    ax.set_ylabel('Client TTFT (s)')
    ax.annotate('Note: A lower TTFT is better',
            xy = (0.5, -0.2),
            xycoords='axes fraction',
            ha='center',
            va="center",
            fontsize=10)

plt.tight_layout()
plt.show()

### Latency

In [None]:
# Get unique concurrent values
concurrent_values = results['num_concurrent_requests'].unique()
concurrent_values.sort()

# Set up the figure and axes
fig, axes = plt.subplots(len(concurrent_values), 1, figsize=(10, 5 * len(concurrent_values)), sharex=False)

# Add a supertitle, it could be the model name
fig.suptitle('<<Model_name>>', fontsize=20)

# Plot each concurrent value
for ax, concurrent in zip(axes, concurrent_values):
    subset = results[results['num_concurrent_requests'] == concurrent]
    sns.barplot(data=subset, x='num_input_tokens', y='client_end_to_end_latency_s_p50', hue='provider', ax=ax, palette=palette, errorbar=None)
    ax.set_title(f'Client E2E Latency (s) for Concurrent Requests: {concurrent}')
    ax.set_xlabel('Input Tokens')
    ax.set_ylabel('Client E2E Latency (s)')
    ax.annotate('Note: A lower Latency is better',
        xy = (0.5, -0.2),
        xycoords='axes fraction',
        ha='center',
        va="center",
        fontsize=10)


plt.tight_layout()
plt.show()

### Tokens / sec

In [None]:
# Get unique concurrent values
concurrent_values = results['num_concurrent_requests'].unique()
concurrent_values.sort()

# Set up the figure and axes
fig, axes = plt.subplots(len(concurrent_values), 1, figsize=(10, 5 * len(concurrent_values)), sharex=False)

# Add a supertitle, it could be the model name
fig.suptitle('<<Model_name>>', fontsize=20)

# Plot each concurrent value
for ax, concurrent in zip(axes, concurrent_values):
    subset = results[results['num_concurrent_requests'] == concurrent]
    sns.barplot(data=subset, x='num_input_tokens', y='client_output_token_per_s_p50', hue='provider', ax=ax, palette=palette, errorbar=None)
    ax.set_title(f'Client Tokens/sec per request for Concurrent Requests: {concurrent}')
    ax.set_xlabel('Input Tokens')
    ax.set_ylabel('Client Tokens/sec per request')
    ax.annotate('Note: higher Tokens/sec is better',
        xy = (0.5, -0.2),
        xycoords='axes fraction',
        ha='center',
        va="center",
        fontsize=10)


plt.tight_layout()
plt.show()

### Throughput

In [None]:
# Get unique concurrent values
concurrent_values = results['num_concurrent_requests'].unique()
concurrent_values.sort()

# Set up the figure and axes
fig, axes = plt.subplots(len(concurrent_values), 1, figsize=(10, 5 * len(concurrent_values)), sharex=False)

# Add a supertitle, it could be the model name
fig.suptitle('<<Model_name>>', fontsize=20)

# Plot each concurrent value
results['client_throughput_median'] = results['num_concurrent_requests']*results['client_output_token_per_s_p50']
for ax, concurrent in zip(axes, concurrent_values):
    subset = results[results['num_concurrent_requests'] == concurrent]
    sns.barplot(data=subset, x='num_input_tokens', y='client_throughput_median', hue='provider', ax=ax, palette=palette, errorbar=None)
    ax.set_title(f'Client Throughput (tok/s) for Concurrent Requests: {concurrent}')
    ax.set_xlabel('Input Tokens')
    ax.set_ylabel('Client Throughput (tok/s)')
    ax.annotate('Note: A higher Throughput is better',
        xy = (0.5, -0.2),
        xycoords='axes fraction',
        ha='center',
        va="center",
        fontsize=10)


plt.tight_layout()
plt.show()