In [1]:


import os
import json
from typing import List

import pandas

def summarize_exps_to_csv(benchmark_result_dir:str):
    all_data = []
    # Loop through the items in the directory
    for item in os.listdir(benchmark_result_dir):
        subdir_full_path = os.path.join(benchmark_result_dir, item)
        # Check if the item is a subdirectory
        if not (item.startswith('nreqs') and os.path.isdir(subdir_full_path)):
            print(f'skipping {item}')
            continue
        benchmark_json = os.path.join(subdir_full_path, 'benchmark.json')
        if not (os.path.isfile(benchmark_json)):
            print(f'skipping {item}')
            continue
        # Parse dir name to configs 
        configs = {}
        pairs = item.split('.')
        for pair in pairs:
            key, value = pair.split('_')
            try:
                value = int(value)
            except ValueError:
                pass
            # Add the key-value pair to the dictionary
            configs[key] = value
        # Read the benchmark result
        with open(benchmark_json) as file:
            metrics = json.load(file)
        
        # Read the profile result and analyze
        with open(os.path.join(subdir_full_path, 'profile.json')) as file:
            profile = json.load(file)
        def get_event(fuzzy_event_name:str, events:List[dict]):
            for e in events:
                if fuzzy_event_name in e["name"]:
                    return e
            return None
        # TODO: there is a reduce kernel in PagedAttentionV2 
        event2fuzzyid={
            "cache_op": "vllm::reshape_and_cache_kernel",
            "attn_usr": "vllm::paged_attention",
            "attn_sys": "flash_fwd",
            "relay_fusion": "_relay_fuse_kernel"
        }
        durations = {}
        for event_name, fuzzy_id in event2fuzzyid.items():
            event = get_event(fuzzy_id, events=profile["traceEvents"])
            if event is not None:
            # print(item, event_name)
                durations[event_name] = event['dur']
            else:
                assert configs['relay'] == "false"
                durations[event_name] = 0
        durations['others'] = metrics['Lantency (us)'] - sum(durations.values())

        # print(configs, metrics, durations)
        all_data.append({**configs, **metrics, **durations})

    df = pandas.DataFrame(all_data)
    df.to_csv(os.path.join(benchmark_result_dir, "summary.csv"), index=False)
    return df

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


In [2]:
import os
import os.path as osp
from matplotlib import pyplot as plt
import numpy as np

bench_root = '../outputs/relay_op'
gpus = os.listdir(bench_root)

for gpu in gpus:
    bench_exps_dir = osp.join(bench_root, gpu)
    df = summarize_exps_to_csv(bench_exps_dir)
    for nreqs_plot in df['nreqs'].unique():
        for ctxlen_plot in df['ctxlen'].unique():
            condition = (df['nreqs']==nreqs_plot) & (df['ctxlen']==ctxlen_plot)
            latency_relay = df[condition & (df['relay'] == 'true')].sort_values('prefixlen')
            latency_norelay = df[condition & (df['relay'] == 'false')].sort_values('prefixlen')
            df_new = pandas.DataFrame(
                data={'PagedAttention': latency_norelay['Lantency (us)'].to_numpy(), 
                      'RelayAttention (ours)': latency_relay['Lantency (us)'].to_numpy()},
                index=latency_relay['prefixlen'].to_numpy()
            )
            figsize=(8, 6)
            plt.rcParams.update({'font.size': 12, 'font.serif': 'Times New Roman', 'axes.linewidth': 1})
            save_path=os.path.join(bench_exps_dir, f'teaser_bs{nreqs_plot}ctxlen{ctxlen_plot}.svg')
            with plt.style.context(['light']):
                ax = df_new.plot.bar(
                        xlabel='System Prompt Length (#tokens)',
                        ylabel='Latency (us)',
                        title='Latency vs. System Prompt Length',
                        width=0.7,
                        rot=0)
                ax.spines['right'].set_visible(False)
                ax.spines['top'].set_visible(False)
            plt.tight_layout()
            plt.savefig(save_path, dpi=300)
            plt.show()

skipping nreqs_1.ctxlen_128.prefixlen_64.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_64.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_128.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_128.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_256.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_256.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_512.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_512.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_1024.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_1024.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_2048.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_2048.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_4096.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_4096.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_8192.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_8192.relay_true
skipping nreqs_1.ctxlen_128.prefixlen_16384.relay_false
skipping nreqs_1.ctxlen_128.prefixlen_16384.relay_true
skipping nreqs_1.ctxlen_256

KeyError: 'nreqs'

In [None]:
plt.style.available

['Solarize_Light2',
 '_classic_test_patch',
 '_mpl-gallery',
 '_mpl-gallery-nogrid',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-v0_8',
 'seaborn-v0_8-bright',
 'seaborn-v0_8-colorblind',
 'seaborn-v0_8-dark',
 'seaborn-v0_8-dark-palette',
 'seaborn-v0_8-darkgrid',
 'seaborn-v0_8-deep',
 'seaborn-v0_8-muted',
 'seaborn-v0_8-notebook',
 'seaborn-v0_8-paper',
 'seaborn-v0_8-pastel',
 'seaborn-v0_8-poster',
 'seaborn-v0_8-talk',
 'seaborn-v0_8-ticks',
 'seaborn-v0_8-white',
 'seaborn-v0_8-whitegrid',
 'tableau-colorblind10']