# <font color="#000099">llm-d-benchmarking Sweep Analysis</font>

This notebook imports data from configuration sweeps with [llm-d-benchmark](https://github.com/llm-d/llm-d-benchmark) using the [vLLM benchmark](https://github.com/vllm-project/vllm/tree/main/benchmarks) harness, and creates Pareto plots to compare configurations for a particular model and workload.

The first cell contains function and class definitions to support basic functionality, while the second cell imports data from user-provided directories into [Pandas](https://pandas.pydata.org/) [DataFrames](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html). Two different DataFrames are created, one for prefill/decode disaggregated setups, and one for standalone (standard vLLM) setups.

The cells following will look at the different scenarios (model, GPU, and workload input/output size) and create tables and Pareto plots for different configurations under these scenarios.

While this basic functionality may be sufficient for many purposes, this notebook should be considered a starting point for more detailed analysis and customization by the user.

## Package imports and definitions (run once)

In [None]:
################################################################################
# Package imports
################################################################################

import os
import re
import sys

import matplotlib
import matplotlib.pyplot as plt
import pandas
import yaml

################################################################################
# Function and class definitions
################################################################################

class Text:
    """ANSI SGR control codes for text formatting"""
    DEFAULT = "\x1b[0m"
    BOLD = "\x1b[1m"
    BOLD_OFF = "\x1b[22m"
    UNDERLINE = "\x1b[4m"
    UNDERLINE_OFF = "\x1b[24m"
    DEFAULT_COLOR = "\x1b[39m"
    DEFAULT_BG_COLOR = "\x1b[49m"
    RED = "\x1b[31m"
    YELLOW = "\x1b[33m"
    GREEN = "\x1b[32m"
    CYAN = "\x1b[36m"
    BLUE = "\x1b[34m"
    MAGENTA = "\x1b[35m"
    BLACK = "\x1b[30m"
    WHITE = "\x1b[37m"
    BG_RED = "\x1b[41m"
    BG_YELLOW = "\x1b[43m"
    BG_GREEN = "\x1b[42m"
    BG_CYAN = "\x1b[46m"
    BG_BLUE = "\x1b[44m"
    BG_MAGENTA = "\x1b[45m"
    BG_BLACK = "\x1b[40m"
    BG_WHITE = "\x1b[47m"


def warn(mesg: str) -> None:
    """Print a warning message."""
    sys.stderr.write(f'{Text.YELLOW}{mesg}\n{Text.DEFAULT}')


def error(mesg: str, err_code: int = 1) -> None:
    """Print an error message and exit with an error code."""
    sys.stderr.write(f'{Text.RED}{mesg}\n{Text.DEFAULT}')
    sys.exit(err_code)


def check_source_dir(source_dir: str) -> None:
    """Print an error if source directory does not exist."""
    if not os.path.isdir(source_dir):
        error(f'Invalid path: {source_dir}')


def _get_pd_sweep_dirs(source_dir: str) -> list[str]:
    """Get all immediate child directories within a source directory that
    correspond to PD sweeps."""
    sweep_dirs = []
    for file in os.listdir(source_dir):
        if not os.path.isdir(os.path.join(source_dir, file)):
            # Skip files that are not directories
            continue
        if not re.search('.+\\_\\_[0-9]+P-TP[0-9]+\\_[0-9]+D-TP[0-9]+$', file):
            # Skip directories that do not match a swept run pattern
            continue
        sweep_dirs.append(os.path.join(source_dir, file))

    sweep_dirs.sort()
    return sweep_dirs


def _get_sa_sweep_dirs(source_dir: str) -> list[str]:
    """Get all immediate child directories within a source directory that
    correspond to standalone sweeps."""
    sweep_dirs = []
    for file in os.listdir(source_dir):
        if not os.path.isdir(os.path.join(source_dir, file)):
            # Skip files that are not directories
            continue
        if not re.search('.+\\_\\_[0-9]+R-TP[0-9]+$', file):
            # Skip directories that do not match a swept run pattern
            continue
        sweep_dirs.append(os.path.join(source_dir, file))

    sweep_dirs.sort()
    return sweep_dirs


def get_sweep_dirs(source_dirs: list[str]) -> tuple[list[str], list[str]]:
    """Get all immediate child directories within a source directory that
    correspond to sweeps, spit into P/D and standalone."""
    pd_sweep_dirs = []
    sa_sweep_dirs = []
    for s_dir in source_dirs:
        check_source_dir(s_dir)
        # Get P/D run directories
        pd_sweep_dirs.extend(_get_pd_sweep_dirs(s_dir))
        # Get standalone run directories
        sa_sweep_dirs.extend(_get_sa_sweep_dirs(s_dir))
    
        if not pd_sweep_dirs and not sa_sweep_dirs:
            error(f'No run directories found in source directory: {s_dir}')
    return (pd_sweep_dirs, sa_sweep_dirs)


def make_pd_df() -> pandas.core.frame.DataFrame:
    """Create DataFrame for PD benchmark run results."""
    return pandas.DataFrame(columns=[
        'Name',
        'Directory',
        'Model',
        'GPU',
        'P_TP',
        'P_Replicas',
        'D_TP',
        'D_Replicas',
        'Concurrency',
        'ISL',
        'OSL',
        'Date',
        'Backend',
        'Num_Prompts',
        'Request_Rate',
        'Burstiness',
        'Duration',
        'Completed',
        'Total_Input_Tokens',
        'Total_Output_Tokens',
        'Request_Throughput',
        'Request_Goodput',
        'Output_Throughput',
        'Total_Token_Throughput',
        'Mean_TTFT_ms',
        'Median_TTFT_ms',
        'Std_TTFT_ms',
        'P90_TTFT_ms',
        'P95_TTFT_ms',
        'P99_TTFT_ms',
        'Mean_TPOT_ms',
        'Median_TPOT_ms',
        'Std_TPOT_ms',
        'P90_TPOT_ms',
        'P95_TPOT_ms',
        'P99_TPOT_ms',
        'Mean_ITL_ms',
        'Median_ITL_ms',
        'Std_ITL_ms',
        'P90_ITL_ms',
        'P95_ITL_ms',
        'P99_ITL_ms',
        'Mean_E2EL_ms',
        'Median_E2EL_ms',
        'Std_E2EL_ms',
        'P90_E2EL_ms',
        'P95_E2EL_ms',
        'P99_E2EL_ms',
    ])


def make_sa_df() -> pandas.core.frame.DataFrame:
    """Create DataFrame for standalone benchmark run results."""
    return pandas.DataFrame(columns=[
        'Name',
        'Directory',
        'Model',
        'GPU',
        'TP',
        'Replicas',
        'Concurrency',
        'ISL',
        'OSL',
        'Date',
        'Backend',
        'Num_Prompts',
        'Request_Rate',
        'Burstiness',
        'Duration',
        'Completed',
        'Total_Input_Tokens',
        'Total_Output_Tokens',
        'Request_Throughput',
        'Request_Goodput',
        'Output_Throughput',
        'Total_Token_Throughput',
        'Mean_TTFT_ms',
        'Median_TTFT_ms',
        'Std_TTFT_ms',
        'P90_TTFT_ms',
        'P95_TTFT_ms',
        'P99_TTFT_ms',
        'Mean_TPOT_ms',
        'Median_TPOT_ms',
        'Std_TPOT_ms',
        'P90_TPOT_ms',
        'P95_TPOT_ms',
        'P99_TPOT_ms',
        'Mean_ITL_ms',
        'Median_ITL_ms',
        'Std_ITL_ms',
        'P90_ITL_ms',
        'P95_ITL_ms',
        'P99_ITL_ms',
        'Mean_E2EL_ms',
        'Median_E2EL_ms',
        'Std_E2EL_ms',
        'P90_E2EL_ms',
        'P95_E2EL_ms',
        'P99_E2EL_ms',
    ])


def import_yaml(file_path: str) -> dict[any, any]:
    """Import a JSON/YAML file as a dict."""
    if not os.path.isfile(file_path):
        error(f'File does not exist: {file_path}')
    with open(file_path, 'r', encoding='UTF-8') as file:
        data = yaml.safe_load(file)
    return data


def get_results_files(run_dir: str) -> list[str]:
    """
    Get list of results files from run.

    If a particular workload has multiple results files, pick newest based on
    filename's date.
    """
    results_files = []
    if not os.path.isdir(run_dir):
        warn(f'Invalid run directory: {run_dir}')
        return results_files
    if not os.path.isdir(os.path.join(run_dir, 'results')):
        warn(f'"results" directory missing in run: {run_dir}')
        return results_files
    # Within the results directory of a run, there can be several benchmarks
    for benchmark in os.listdir(os.path.join(run_dir, 'results')):
        if not os.path.isdir(os.path.join(run_dir, 'results', benchmark)):
            continue
        # Sort files by newest first
        files_sorted = sorted(
            os.listdir(os.path.join(run_dir, 'results', benchmark)),
            # Sorting by modified time will not work if files are copied
            # locally in arbitrary order
            #key=lambda ff: os.path.getmtime(os.path.join(run_dir, "results", benchmark, ff)),
            reverse=True)
        for file in files_sorted:
            if not os.path.isfile(os.path.join(run_dir, 'results', benchmark, file)):
                continue
            if not re.search('^vllm.+\\.json$', file):
                # Skip files that do not match result data filename
                continue
            results_files.append(os.path.join(run_dir, 'results', benchmark, file))
            break
    return results_files


def get_launcher_yaml(sweep_dir: str) -> dict[str, any]:
    """Get information on the pod_benchmark-launcher.yaml file."""
    launcher_yaml_path = os.path.join(sweep_dir, 'setup', 'yamls',
                                    'pod_benchmark-launcher.yaml')
    launcher = import_yaml(launcher_yaml_path)
    # Check file contents before returning
    try:
        launcher['spec']['containers'][0]['env'][0]
    except (KeyError, IndexError):
        error(f'"spec.containers[0].env[0]" field missing: {launcher_yaml_path}')
    return launcher


def _get_workload_profile_v01(sweep_dir: str) -> dict[str, any]:
    """Get workload profile file from a sweep.

    This works for datasets obtained prior to release v0.2.
    Deprecated, to be removed in a future release.
    """
    if not os.path.isdir(sweep_dir):
        error(f'Invalid run directory: {sweep_dir}')
    if not os.path.isdir(os.path.join(sweep_dir, 'workload', 'profiles')):
        error(f'"workload/profiles" directory missing in sweep: {sweep_dir}')
    # Get the workload file (there should be only one, and we will assume this)
    for file in os.listdir(os.path.join(sweep_dir, 'workload', 'profiles')):
        if os.path.isdir(os.path.join(sweep_dir, 'workload', 'profiles', file)):
            continue
        if not re.search('.+\\.yaml$', file):
            # Skip files that do not match result data filename
            continue
        return import_yaml(os.path.join(sweep_dir, 'workload', 'profiles', file))


def _get_workload_profile_v02(sweep_dir: str) -> dict[str, any]:
    """Get workload profile file from a sweep, using v0.2 structure."""
    launcher = get_launcher_yaml(sweep_dir)
    profile_name = ''
    harness_name = ''
    for kv in launcher['spec']['containers'][0]['env']:
        if 'name' not in kv or 'value' not in kv:
            error(f'Invalid "spec.containers[0].env" entry in launcher: {sweep_dir}')
        if kv['name'] == 'LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME':
            profile_name = kv['value']
        if kv['name'] == 'LLMDBENCH_HARNESS_NAME':
            harness_name = kv['value']
        if profile_name and harness_name:
            break
    if not profile_name:
        error(f'Workload profile could not be found in in launcher: {sweep_dir}')
    if not harness_name:
        error(f'Harness name could not be found in in launcher: {sweep_dir}')
    profile_dir_relative = os.path.join('workload', 'profiles', harness_name)
    profile_dir = os.path.join(sweep_dir, profile_dir_relative)
    if not os.path.isdir(profile_dir):
        # Exception below is temporary, in order to support v0.1 imports, to be
        # removed in future release
        raise Exception(f'"{profile_dir_relative}" directory missing in sweep: {sweep_dir}')
        error(f'"{profile_dir_relative}" directory missing in sweep: {sweep_dir}')
    profile_path = os.path.join(profile_dir, profile_name + '.yaml')
    if not os.path.isfile(profile_path):
        error(f'Cannot find workload profile: {profile_path}')
    return import_yaml(profile_path)


def get_workload_profile(sweep_dir: str) -> dict[str, any]:
    """Get workload profile file from a sweep."""
    profile = {}
    try:
        profile = _get_workload_profile_v02(sweep_dir)
    except:
        warn(f'Sweep may not match v0.2 structure, trying v0.1: {sweep_dir}')
        profile = _get_workload_profile_v01(sweep_dir)
    return profile


def get_envar(sweep_dir: str, envar: str) -> str:
    """Get value of environment variable in environment/variables file of sweep."""
    if not os.path.isdir(sweep_dir):
        error(f'Invalid run directory: {sweep_dir}')
    if not os.path.isdir(os.path.join(sweep_dir, "environment")):
        error(f'"environment" directory missing in run: {sweep_dir}')
    if not os.path.isfile(os.path.join(sweep_dir, "environment", "variables")):
        error(f'"variables" file missing in run: {os.path.join(sweep_dir, "environment")}')
    with open(os.path.join(sweep_dir, "environment", "variables"), "r", encoding="UTF-8") as file:
        for line in file:
            if envar in line:
                model = line.rsplit('=', 1)[-1].strip()
                if not model:
                    error(f'{envar} not defined: {sweep_dir}')
                return model
        error(f'{envar} missing from environment/variables: {sweep_dir}')


def populate_pd_df(runs_df: pandas.core.frame.DataFrame, sweep_dirs: list[str]) -> None:
    """Populate PD dataframe with results from a list of PD sweeps."""
    for s_dir in sweep_dirs:
        results_files = get_results_files(s_dir)
        model = get_envar(s_dir, 'LLMDBENCH_DEPLOY_MODEL_LIST')
        gpu = get_envar(s_dir, 'LLMDBENCH_VLLM_COMMON_AFFINITY').rsplit(':', 1)[-1]
        name, config_str = s_dir.rsplit('__', 1)
        name = name.rsplit('/', 1)[-1]
        p_rep = int(config_str.split('P-TP', 1)[0])
        p_tp = int(config_str.split('P-TP', 1)[1].split('_', 1)[0])
        d_rep = int(config_str.rsplit('_', 1)[1].split('D-TP', 1)[0])
        d_tp = int(config_str.split('D-TP', 1)[1])
        workload_profile = get_workload_profile(s_dir)
        for rf in results_files:
            result_data = import_yaml(rf)
            runs_df.loc[len(runs_df)] = {
                'Name': name,
                'Directory': s_dir,
                'Model': model,
                'GPU': gpu,
                'P_TP': p_tp,
                'P_Replicas': p_rep,
                'D_TP': d_tp,
                'D_Replicas': d_rep,
                'Concurrency': result_data['max_concurrency'],
                'ISL': workload_profile['random-input-len'],
                'OSL': workload_profile['random-output-len'],
                'Date': result_data['date'],
                'Backend': result_data['backend'],
                'Num_Prompts': result_data['num_prompts'],
                'Request_Rate': result_data['request_rate'],
                'Burstiness': result_data['burstiness'],
                'Duration': result_data['duration'],
                'Completed': result_data['completed'],
                'Total_Input_Tokens': result_data['total_input_tokens'],
                'Total_Output_Tokens': result_data['total_output_tokens'],
                'Request_Throughput': result_data['request_throughput'],
                'Request_Goodput': result_data['request_goodput'],
                'Output_Throughput': result_data['output_throughput'],
                'Total_Token_Throughput': result_data['total_token_throughput'],
                'Mean_TTFT_ms': result_data['mean_ttft_ms'],
                'Median_TTFT_ms': result_data['median_ttft_ms'],
                'Std_TTFT_ms': result_data['std_ttft_ms'],
                'P90_TTFT_ms': result_data['p90_ttft_ms'],
                'P95_TTFT_ms': result_data['p95_ttft_ms'],
                'P99_TTFT_ms': result_data['p99_ttft_ms'],
                'Mean_TPOT_ms': result_data['mean_tpot_ms'],
                'Median_TPOT_ms': result_data['median_tpot_ms'],
                'Std_TPOT_ms': result_data['std_tpot_ms'],
                'P90_TPOT_ms': result_data['p90_tpot_ms'],
                'P95_TPOT_ms': result_data['p95_tpot_ms'],
                'P99_TPOT_ms': result_data['p99_tpot_ms'],
                'Mean_ITL_ms': result_data['mean_itl_ms'],
                'Median_ITL_ms': result_data['median_itl_ms'],
                'Std_ITL_ms': result_data['std_itl_ms'],
                'P90_ITL_ms': result_data['p90_itl_ms'],
                'P95_ITL_ms': result_data['p95_itl_ms'],
                'P99_ITL_ms': result_data['p99_itl_ms'],
                'Mean_E2EL_ms': result_data['mean_e2el_ms'],
                'Median_E2EL_ms': result_data['median_e2el_ms'],
                'Std_E2EL_ms': result_data['std_e2el_ms'],
                'P90_E2EL_ms': result_data['p90_e2el_ms'],
                'P95_E2EL_ms': result_data['p95_e2el_ms'],
                'P99_E2EL_ms': result_data['p99_e2el_ms'],
            }
    # Add calculated columns
    runs_df['Num_GPUs'] = runs_df['P_TP']*runs_df['P_Replicas'] + runs_df['D_TP']*runs_df['D_Replicas']
    runs_df['Thpt_per_GPU'] = runs_df['Output_Throughput']/runs_df['Num_GPUs']
    runs_df['Thpt_per_User'] = runs_df['Output_Throughput']/runs_df['Concurrency']


def populate_sa_df(runs_df: pandas.core.frame.DataFrame, sweep_dirs: list[str]) -> None:
    """Populate standalone dataframe with results from a list of standalone sweeps."""
    for s_dir in sweep_dirs:
        results_files = get_results_files(s_dir)
        model = get_envar(s_dir, 'LLMDBENCH_DEPLOY_MODEL_LIST')
        gpu = get_envar(s_dir, 'LLMDBENCH_VLLM_COMMON_AFFINITY').rsplit(':', 1)[-1]
        name, config_str = s_dir.rsplit('__', 1)
        name = name.rsplit('/', 1)[-1]
        rep = int(config_str.split('R-TP', 1)[0])
        tp = int(config_str.split('R-TP', 1)[-1])
        workload_profile = get_workload_profile(s_dir)
        for rf in results_files:
            result_data = import_yaml(rf)
            runs_df.loc[len(runs_df)] = {
                'Name': name,
                'Directory': s_dir,
                'Model': model,
                'GPU': gpu,
                'TP': tp,
                'Replicas': rep,
                'Concurrency': result_data['max_concurrency'],
                'ISL': workload_profile['random-input-len'],
                'OSL': workload_profile['random-output-len'],
                'Date': result_data['date'],
                'Backend': result_data['backend'],
                'Num_Prompts': result_data['num_prompts'],
                'Request_Rate': result_data['request_rate'],
                'Burstiness': result_data['burstiness'],
                'Duration': result_data['duration'],
                'Completed': result_data['completed'],
                'Total_Input_Tokens': result_data['total_input_tokens'],
                'Total_Output_Tokens': result_data['total_output_tokens'],
                'Request_Throughput': result_data['request_throughput'],
                'Request_Goodput': result_data['request_goodput'],
                'Output_Throughput': result_data['output_throughput'],
                'Total_Token_Throughput': result_data['total_token_throughput'],
                'Mean_TTFT_ms': result_data['mean_ttft_ms'],
                'Median_TTFT_ms': result_data['median_ttft_ms'],
                'Std_TTFT_ms': result_data['std_ttft_ms'],
                'P90_TTFT_ms': result_data['p90_ttft_ms'],
                'P95_TTFT_ms': result_data['p95_ttft_ms'],
                'P99_TTFT_ms': result_data['p99_ttft_ms'],
                'Mean_TPOT_ms': result_data['mean_tpot_ms'],
                'Median_TPOT_ms': result_data['median_tpot_ms'],
                'Std_TPOT_ms': result_data['std_tpot_ms'],
                'P90_TPOT_ms': result_data['p90_tpot_ms'],
                'P95_TPOT_ms': result_data['p95_tpot_ms'],
                'P99_TPOT_ms': result_data['p99_tpot_ms'],
                'Mean_ITL_ms': result_data['mean_itl_ms'],
                'Median_ITL_ms': result_data['median_itl_ms'],
                'Std_ITL_ms': result_data['std_itl_ms'],
                'P90_ITL_ms': result_data['p90_itl_ms'],
                'P95_ITL_ms': result_data['p95_itl_ms'],
                'P99_ITL_ms': result_data['p99_itl_ms'],
                'Mean_E2EL_ms': result_data['mean_e2el_ms'],
                'Median_E2EL_ms': result_data['median_e2el_ms'],
                'Std_E2EL_ms': result_data['std_e2el_ms'],
                'P90_E2EL_ms': result_data['p90_e2el_ms'],
                'P95_E2EL_ms': result_data['p95_e2el_ms'],
                'P99_E2EL_ms': result_data['p99_e2el_ms'],
            }
    # Add calculated columns
    runs_df['Num_GPUs'] = runs_df['TP']*runs_df['Replicas']
    runs_df['Thpt_per_GPU'] = runs_df['Output_Throughput']/runs_df['Num_GPUs']
    runs_df['Thpt_per_User'] = runs_df['Output_Throughput']/runs_df['Concurrency']


def get_scenarios(runs_df: pandas.core.frame.DataFrame) -> list[tuple[str]]:
    """Get a list of available scenarios from runs DataFrame, where
    configurations and concurrency will be swept"""
    columns = ['Model', 'GPU', 'ISL', 'OSL']
    return list(set(runs_df.set_index(columns).index))


def print_scenarios(scenarios: list[str]) -> None:
    """Print a formatted table of scenarios."""
    columns = ['Model', 'GPU', 'ISL', 'OSL']
    # Get maximum text length for each column, including header
    spans = list(map(len, columns))
    for sc in scenarios:
        for jj, item in enumerate(sc):
            if spans[jj] < len(str(item)):
                spans[jj] = len(str(item))
    
    header = f'{Text.BOLD}{Text.BLUE}IDX  {Text.DEFAULT}{Text.BOLD}'
    for ii, col in enumerate(columns):
        header += col + " " * (spans[ii] - len(col) + 2)
    header += f'{Text.DEFAULT}'
    print(header)
    for ii, sc in enumerate(scenarios):
        row = f'{Text.BLUE}{ii}{Text.DEFAULT}' + " " * (5 - len(str(ii)))
        for jj, val in enumerate(sc):
            row += f'{str(val)}' + " " * (spans[jj] - len(str(val)) + 2)
        print(row)

## Import datasets

In [None]:
################################################################################
# User inputs
################################################################################

# List of directories containing sweep directories to import.
# These can be a mix of PD and standalone sweeps.
# Only sweep directories that are direct children of these directories will be
# imported.
source_dirs = [
    "/files/",
]

################################################################################
# Standard code
################################################################################

# Create blank DataFrames for prefill/decode and standalone configurations
pd_runs = make_pd_df()
sa_runs = make_sa_df()

# Look through provided source directories for immediate child directories
# containing sweep data
pd_sweep_dirs, sa_sweep_dirs = get_sweep_dirs(source_dirs)
# Populate DataFrames
populate_pd_df(pd_runs, pd_sweep_dirs)
populate_sa_df(sa_runs, sa_sweep_dirs)

# PD Disaggregated

In [None]:
# Scenarios available, sweeping P and D replicas/TP configurations and concurrency
pd_scenarios = get_scenarios(pd_runs)
print_scenarios(pd_scenarios)

In [None]:
################################################################################
# User inputs
################################################################################

# Select scenario
idx = 0

# Segregate traces by directory (directories with identical scenarios, such as
# repeated runs, will not be joined together in a single trace)
seg_by_dir = True

################################################################################
# Standard code
################################################################################

# Get parameters of selected scenario
model, gpu, isl, osl = pd_scenarios[idx]

# Filter on column values
pd_runs_selected = pd_runs[
    (pd_runs['Model'] == model) &
    (pd_runs['GPU'] == gpu) &
    (pd_runs['ISL'] == isl) &
    (pd_runs['OSL'] == osl)][[
    'Model',
    'GPU',
    'P_TP',
    'P_Replicas',
    'D_TP',
    'D_Replicas',
    'Concurrency',
    'ISL',
    'OSL',
    'Output_Throughput',
    'Thpt_per_GPU',
    'Thpt_per_User',
    'Directory']].drop('Model', axis=1).drop('GPU', axis=1).drop('ISL', axis=1).drop('OSL', axis=1)#.sort_values(by='Output_Throughput')

# Plot performance results
colors = ['#FF0000', '#FFAA00', '#DDDD00', '#00DD00', '#00FFFF', '#0000FF',
          '#FF00FF', '#666666', '#000000']

# Unique configurations of replicas and TP
if seg_by_dir:
    columns = ['Model', 'GPU', 'ISL', 'OSL', 'Directory']
    scenarios = list(set(pd_runs.set_index(columns).index))
    configs = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))
else:
    pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)
    configs = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))
configs.sort()
# Sweep through configurations
for ii, conf in enumerate(configs):
    # Make a DataFrame for specific configuration
    if seg_by_dir:
        conf_df = pd_runs_selected[
            (pd_runs_selected['P_Replicas'] == conf[0]) &
            (pd_runs_selected['P_TP'] == conf[1]) &
            (pd_runs_selected['D_Replicas'] == conf[2]) &
            (pd_runs_selected['D_TP'] == conf[3]) &
            (pd_runs_selected['Directory'] == conf[4])
        ].sort_values(by='Concurrency')
    else:
        conf_df = pd_runs_selected[
            (pd_runs_selected['P_Replicas'] == conf[0]) &
            (pd_runs_selected['P_TP'] == conf[1]) &
            (pd_runs_selected['D_Replicas'] == conf[2]) &
            (pd_runs_selected['D_TP'] == conf[3])
        ].sort_values(by='Concurrency')
    display(conf_df)

    # Plot throughputs for configuration
    plt.plot(conf_df.Thpt_per_User, conf_df.Thpt_per_GPU,
             label=f'{conf[0]}P-TP={conf[1]} {conf[2]}D-TP={conf[3]}',
             marker='o', markersize=4,
             color=colors[ii%len(colors)]
            )
    for jj, val in enumerate(conf_df.Concurrency):
        plt.text(list(conf_df.Thpt_per_User)[jj],
                 list(conf_df.Thpt_per_GPU)[jj]+pd_runs_selected['Thpt_per_GPU'].max()*0.02,
                 str(val), ha='center', color=colors[ii%len(colors)])

plt.title(f'GPU: {gpu}\nModel: {model}\nISL: {isl}  OSL: {osl}')
plt.xlabel('Tok/s/User', fontsize='16')
plt.ylabel('Tok/s/GPU', fontsize='16')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.grid(True, linewidth=1, ls='--', color='gray')
plt.axis([0, None, 0, None])
plt.show()


# Standalone

In [None]:
# Scenarios available, sweeping replicas/TP configurations and concurrency
sa_scenarios = get_scenarios(sa_runs)
print_scenarios(sa_scenarios)

In [None]:
################################################################################
# User inputs
################################################################################

# Select scenario
idx = 0

# Segregate traces by directory (directories with identical scenarios, such as
# repeated runs, will not be joined together in a single trace)
seg_by_dir = True

################################################################################
# Standard code
################################################################################

# Get parameters of selected scenario
model, gpu, isl, osl = sa_scenarios[idx]

# Filter on column values
sa_runs_selected = sa_runs[
    (sa_runs['Model'] == model) &
    (sa_runs['GPU'] == gpu) &
    (sa_runs['ISL'] == isl) &
    (sa_runs['OSL'] == osl)][[
    'Model',
    'GPU',
    'TP',
    'Replicas',
    'Concurrency',
    'ISL',
    'OSL',
    'Output_Throughput',
    'Thpt_per_GPU',
    'Thpt_per_User',
    'Directory']].drop('Model', axis=1).drop('GPU', axis=1).drop('ISL', axis=1).drop('OSL', axis=1)#.sort_values(by='Output_Throughput')

# Plot performance results
colors = ['#FF0000', '#FFAA00', '#DDDD00', '#00DD00', '#00FFFF', '#0000FF',
          '#FF00FF', '#666666', '#000000']

# Unique configurations of replicas and TP

if seg_by_dir:
    columns = ['Model', 'GPU', 'ISL', 'OSL', 'Directory']
    scenarios = list(set(sa_runs.set_index(columns).index))
    configs = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))
else:
    sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)
    configs = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))
configs.sort()
# Sweep through configurations
for ii, conf in enumerate(configs):
    # Make a DataFrame for specific configuration
    if seg_by_dir:
        conf_df = sa_runs_selected[
            (sa_runs_selected['Replicas'] == conf[0]) &
            (sa_runs_selected['TP'] == conf[1]) &
            (sa_runs_selected['Directory'] == conf[2])
        ].sort_values(by='Concurrency')
    else:
        conf_df = sa_runs_selected[
            (sa_runs_selected['Replicas'] == conf[0]) &
            (sa_runs_selected['TP'] == conf[1])
        ].sort_values(by='Concurrency')
    display(conf_df)

    # Plot throughputs for configuration
    plt.plot(conf_df.Thpt_per_User, conf_df.Thpt_per_GPU,
             label=f'Replicas: {conf[0]}  TP={conf[1]}',
             marker='o', markersize=4,
             color=colors[ii%len(colors)]
            )
    for jj, val in enumerate(conf_df.Concurrency):
        plt.text(list(conf_df.Thpt_per_User)[jj],
                 list(conf_df.Thpt_per_GPU)[jj]+sa_runs_selected['Thpt_per_GPU'].max()*0.02,
                 str(val), ha='center', color=colors[ii%len(colors)])

plt.title(f'GPU: {gpu}\nModel: {model}\nISL: {isl}  OSL: {osl}')
plt.xlabel('Tok/s/User', fontsize='16')
plt.ylabel('Tok/s/GPU', fontsize='16')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.grid(True, linewidth=1, ls='--', color='gray')
plt.axis([0, None, 0, None])
plt.show()
