In [None]:
import pandas as pd
import io
import os
import plotly.express as px
import plotly.graph_objects as go

def get_root_dir():
    default_root_dir = "/root/genai-perf-results/artifacts"
    # See if there is an environment variable set for the root directory
    root_dir = os.getenv('GENAI_PERF_ROOT_DIR')
    if root_dir:
        return root_dir

    return default_root_dir

def model_name_prefix():
    model_name_prefix = os.getenv('GENAI_PERF_MODEL_NAME_PREFIX')
    if model_name_prefix:
        return model_name_prefix

    raise ValueError("Environment variable GENAI_PERF_MODEL_NAME_PREFIX is not set. Please set it to the model name prefix.")

def parse_data(file_path):
    # Create a StringIO buffer
    buffer = io.StringIO()
    with open(file_path, 'rt') as file:
        for i, line in enumerate(file):
            if i not in [7,8]:
                buffer.write(line)
    # Make sure to reset the buffer's position to the beginning before reading
    buffer.seek(0)
    # Read the buffer into a pandas DataFrame
    df = pd.read_csv(buffer)
    return df

def safe_float(val):
    return float(str(val).replace(',', ''))

def get_metadata(file_path):
    # The file path ends with .csv, but we wanna open the .json file
    json_file_path = file_path.replace('.csv', '.json')
    if not os.path.exists(json_file_path):
        return None
    with open(json_file_path, 'r') as json_file:
        metadata = json_file.read()
    return metadata

input_output = [
    "256_256",
    "256_8",
    "1024_256",
]
concurrencies = [1, 2, 5, 10, 50, 100, 250]

In [None]:
ROOT_DIR = get_root_dir()
# directory_prefix = model_name_prefix()
directory_prefix = "meta-llama_Llama-3.3-70B-Instruct-openai-chat-concurrency"

In [None]:
all_data = {}
for input_output_pair in input_output:
    TPS = []
    TTFT = []
    Latency = []
    ITL = []
    Request = []
    metadata = []

    for con in concurrencies:
        file = os.path.join(ROOT_DIR, directory_prefix+str(con), input_output_pair + "_genai_perf.csv")
        df = parse_data(file)

        TPS.append(safe_float(df.iloc[6]['avg']))
        TTFT.append(safe_float(df.iloc[0]['avg'])/1000)
        Latency.append(safe_float(df.iloc[2]['avg'])/1000)
        ITL.append(safe_float(df.iloc[3]['avg'])/1000)
        Request.append(safe_float(df.iloc[8]['avg']))
        metadata.append(get_metadata(file))

    all_data[input_output_pair] = {
        'TPS': TPS,
        'TTFT': TTFT,
        'Latency': Latency,
        'ITL': ITL,
        'Request': Request,
        'concurrencies': concurrencies
    }

# Create 4 charts with all input/output pairs on each chart
# Chart 1: TTFT vs TPS
fig = go.Figure()
for input_output_pair in input_output:
    data = all_data[input_output_pair]
    fig.add_trace(go.Scatter(
        x=data['TTFT'],
        y=data['TPS'],
        mode='lines+markers+text',
        text=data['concurrencies'],
        textposition="top center",
        name=input_output_pair,
        line=dict(width=2)
    ))
fig.update_layout(
    xaxis_title="Time to first token (s) [lower is better]",
    yaxis_title="Total System: tokens/s",
    title="TTFT vs TPS for all input/output pairs"
)
fig.show()

# Chart 2: Latency vs TPS
fig = go.Figure()
for input_output_pair in input_output:
    data = all_data[input_output_pair]
    fig.add_trace(go.Scatter(
        x=data['Latency'],
        y=data['TPS'],
        mode='lines+markers+text',
        text=data['concurrencies'],
        textposition="top center",
        name=input_output_pair,
        line=dict(width=2)
    ))
fig.update_layout(
    xaxis_title="Latency (s) [lower is better]",
    yaxis_title="Total System: tokens/s",
    title="Latency vs TPS for all input/output pairs"
)
fig.show()

# Chart 3: ITL vs TPS
fig = go.Figure()
for input_output_pair in input_output:
    data = all_data[input_output_pair]
    fig.add_trace(go.Scatter(
        x=data['ITL'],
        y=data['TPS'],
        mode='lines+markers+text',
        text=data['concurrencies'],
        textposition="top center",
        name=input_output_pair,
        line=dict(width=2)
    ))
fig.update_layout(
    xaxis_title="ITL (s) [lower is better]",
    yaxis_title="Total System: tokens/s",
    title="ITL vs TPS for all input/output pairs"
)
fig.show()

# Chart 4: Request vs TPS
fig = go.Figure()
for input_output_pair in input_output:
    data = all_data[input_output_pair]
    fig.add_trace(go.Scatter(
        x=data['Request'],
        y=data['TPS'],
        mode='lines+markers+text',
        text=data['concurrencies'],
        textposition="top center",
        name=input_output_pair,
        line=dict(width=2)
    ))
fig.update_layout(
    xaxis_title="Request",
    yaxis_title="Total System: tokens/s",
    title="Request vs TPS for all input/output pairs"
)
fig.show()