In [None]:
import os
import json
from typing import List

import pandas

def collect_stat(benchmark_result_dir:str, csv_path=None):
    if csv_path is None:
        csv_path = os.path.join(benchmark_result_dir, "summary.csv")
        
    all_data = []
    # Loop through the items in the directory
    for item in os.listdir(benchmark_result_dir):
        subdir_full_path = os.path.join(benchmark_result_dir, item)
        
        # Check if the item is a subdirectory
        if not (item.startswith('nreqs') and os.path.isdir(subdir_full_path)):
            print(f'skipping {item}')
            continue
        
        result_json_file = os.path.join(subdir_full_path, 'benchmark.json')
        if not os.path.isfile(result_json_file):
            print(f'skipping {item}')
            continue
        
        # Parse dir name to configs 
        configs = {}
        pairs = item.split('.')
        for pair in pairs:
            key, value = pair.split('_')
            try:
                value = int(value)
            except ValueError:
                pass
            # Add the key-value pair to the dictionary
            configs[key] = value
            
        # Read the benchmark result
        with open(result_json_file) as file:
            metrics = json.load(file)
            
        all_data.append({**configs, **metrics})
        
    df = pandas.DataFrame(all_data)
    df.to_csv(csv_path, index=False)
    
    return csv_path

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from typing import Iterable, Union, List, Dict

def plot_multi_barchart(group_ids:Union[List[int], List[str]],
                        data_dict:Dict[str, Iterable], ax:plt.Axes=None,
                        put_text_on_bars:bool=False):
    ngrps = len(group_ids)
    nbars_per_grp = len(data_dict)
    bar_width = 1. / (nbars_per_grp + 1)
    opacity = 0.8
    # opacity = 1.0

    leftmost = np.arange(ngrps) - (nbars_per_grp*bar_width / 2)

    if ax is None:
        _, ax = plt.subplots()

    for idx, (name, data) in enumerate(data_dict.items()):
        assert len(data) == ngrps
        center_offset_to_leftmost = bar_width / 2 + bar_width*idx
        bars = plt.bar(
            leftmost + center_offset_to_leftmost , # center of the bars
            data, # heights of the bars
            bar_width,
            alpha=opacity,
            label=name
        )
        if put_text_on_bars:
            for i in range(ngrps):
                # print(data[i])
                plt.text(leftmost[i]+center_offset_to_leftmost, data[i], f'{data[i]:.1f}', ha='center', size='x-small')
    
    ax.set_xticks(np.arange(ngrps), group_ids)
    
    return ax

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


def plot_figure(csv_path:str,
                metric_plot='generate_tok_per_sec',
                y_lim=None,
                save_name=None,
                ftype='line'):
    assert ftype in {'line', 'bar'}
    # metric_plot = 'throughput'
    line_styles = ['-o', '-D', '->']
    df = pd.read_csv(csv_path)
    condition_dict = { 
        'vLLM-RA (ours)': (df['backend']=='vllm+'),
        'vLLM+': (df['backend']=='vllm+pc'), 
        'vLLM': (df['backend']=='vllm')
    }
    
    data_dict = {}
    for name, cond in condition_dict.items():
        # print(df)
        # print(df['reqrate'])
        view = df[cond].sort_values('prefixlen')
        x = view['prefixlen'].to_numpy(dtype=np.int32)
        y = view[metric_plot].to_numpy(dtype=np.float32)
        data_dict[name] = (x, y)

    # try:
    #     plt_context = plt.style.context(['light', 'grid'])
    #     with plt_context:
    #         pass
    # except:
    #     import contextlib
    #     plt_context = contextlib.nullcontext()

    plt_context = plt.style.context(['grid'])

    with plt_context:
        figure, ax = plt.subplots()
        if ftype == 'line':
            for idx, (label, data) in enumerate(data_dict.items()):
                x, y = data
                ax.plot(x, y, line_styles[idx], label=label)
            if y_lim:
                ax.set_ylim(y_lim)
        else:
            bar_dict = {k:v for k, (_, v) in data_dict.items()}
            group_ids = list(data_dict.values())[0][0]
            plot_multi_barchart(group_ids=group_ids, data_dict=bar_dict, ax=ax)
        
        # ax.set_ylabel('Throughput (tok/s)')
        # ax.set_xlabel('System Prompt Length (#tokens)')
        # ax.legend()
    plt.tight_layout()
    save_dir = os.path.dirname(csv_path)
    if save_name:
        save_path = os.path.join(save_dir, save_name)
        plt.savefig(save_path)
    plt.show()

In [None]:
bench='noninteractive_bench_sharegpt'
exp_root = f'../outputs/{bench}'

csv_root = f'../stat/{bench}'
os.makedirs(csv_root, exist_ok=True)

collect = False

figsize=(8, 6)

plt.rcParams.update({'font.size': 16,
                     'font.weight': 'bold',
                     'lines.markersize': 10,
                     'lines.linewidth': 2,
                    #  'font.family': 'serif',
                    #  'font.serif': 'Times New Roman',
                     'axes.linewidth': 2
                     })

if collect:
    for gpu in os.listdir(exp_root):
        gpu_dir = os.path.join(exp_root, gpu)
        for model in os.listdir(gpu_dir):
            benchmark_result_dir = os.path.join(gpu_dir, model)
            csv_path = os.path.join(csv_root, f'{gpu}.{model}.csv')
            collect_stat(benchmark_result_dir, csv_path=csv_path)

for file in os.listdir(csv_root):
    if file.endswith('.csv'):
        gpu, model, _ = file.split('.')
        csv_path = os.path.join(csv_root,file)
        print(gpu, model)
        if model == 'Llama-2-7b-hf':
            save_name = f'sharegpt.batch.{gpu}.{model}.svg'
            plot_figure(csv_path, ftype='bar', save_name=save_name)

In [15]:
import pandas as pd
import numpy as np

bench='noninteractive_bench_sharegpt'
csv_root = f'../stat/{bench}'
model = 'Llama-2-7b-hf'

for gpu in ('NVIDIA-A40', 'NVIDIA-A100-PCIE-40GB', 'A100'):
    csv_path = os.path.join(csv_root, f'{gpu}.{model}.csv')
    df = pd.read_csv(csv_path)
    speedup_pc_list = []
    speedup_rl_list = []
    for prefix_len in (64, 128, 256, 512, 1024, 2048):
        view = df[df['prefixlen']==prefix_len]
        # print(view)
        tok = view[view['backend']=='vllm']['generate_tok_per_sec'].to_numpy()
        tok_pc = view[view['backend']=='vllm+pc']['generate_tok_per_sec'].to_numpy()
        tok_rl = view[view['backend']=='vllm+']['generate_tok_per_sec'].to_numpy()
        # print(tok)
        speedup_pc = tok_pc/tok
        speedup_rl = tok_rl/tok
        speedup_pc_list.append(speedup_pc)
        speedup_rl_list.append(speedup_rl)
        # print(speedup_pc)
        # break
    speedup_pc_np = np.array(speedup_pc_list)
    speedup_rl_np = np.array(speedup_rl_list)
    # print(speedup_pc_np)
    print(f"{gpu}, pc: {speedup_pc_np.min()}, {speedup_pc_np.max()}")
    print(f"{gpu}, rl: {speedup_rl_np.min()}, {speedup_rl_np.max()} ")

NVIDIA-A40, pc: 1.029582705964609, 2.086327360332196
NVIDIA-A40, rl: 1.0639575259612213, 4.358663222551045 
NVIDIA-A100-PCIE-40GB, pc: 0.992009175733808, 2.0172629630284975
NVIDIA-A100-PCIE-40GB, rl: 1.0677108396656112, 4.32136891460454 
A100, pc: 0.993381050410718, 1.6606472227314528
A100, rl: 1.06927025067276, 3.9044133995268306 
