# Analyze responses
The following is an example of the analysis that can be done on individual responses that are saved when running `token_benchmark_ray.py` with the flag `--results-dir` which enables the saving of all responses.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Read the input json file

In [None]:
# path to the individual responses json file
df_user = pd.read_json(f'../data/results/llmperf/COE-Meta-Llama-3-8B-Instruct_1000_1000_20_stream_individual_responses.json')
df_user = df_user[(df_user["error_code"] != "")]

## Create a summary dataframe


In [None]:
df_summary1 = df_user.groupby('batch_size_used')['server_output_token_per_s_per_request'].mean().reset_index()
df_summary2 = df_user.groupby('batch_size_used')['server_number_output_tokens'].sum().reset_index()
df_summary3 = df_user.groupby('batch_size_used')['server_ttft_s'].mean().reset_index()
df_summary4 = df_user.groupby('batch_size_used').size().reset_index(name='Counts')


df_summary = pd.merge(df_summary1, df_summary2, on='batch_size_used', how='inner')
df_summary = pd.merge(df_summary, df_summary3, on='batch_size_used', how='inner')
df_summary = pd.merge(df_summary, df_summary4, on='batch_size_used', how='inner')
df_summary['server_combined_output_tokens_per_s'] = df_summary['server_output_token_per_s_per_request']*df_summary['batch_size_used']

In [None]:
fig, ax = plt.subplots(ncols=1,nrows=5, figsize=(10,12))

sns.barplot(df_summary, x="batch_size_used", y="Counts",ax=ax[0])
sns.barplot(df_summary, x="batch_size_used", y="server_output_token_per_s_per_request",ax=ax[1])
sns.barplot(df_summary, x="batch_size_used", y="server_combined_output_tokens_per_s",ax=ax[2])
sns.barplot(df_summary, x="batch_size_used", y="server_number_output_tokens",ax=ax[3])
sns.barplot(df_summary, x="batch_size_used", y="server_ttft_s",ax=ax[4])

## Time taken
- Compute the time that calls are cumulatively waiting for time-to-first-token vs time to generate tokens

In [None]:
total_wait_time_ttft = (df_summary['Counts']/df_summary['batch_size_used']*df_summary['server_ttft_s']).sum()
total_generation_time = (df_summary['server_number_output_tokens']/df_summary['server_output_token_per_s_per_request']).sum()
print(f'Total wait time due to ttft (mins) = {total_wait_time_ttft/60}')
print(f'Total generation time due (mins) = {total_generation_time/60}')
print(f'Total time = {(total_wait_time_ttft + total_generation_time)/60}')

## Requests Gantt Chart

In [None]:
from datetime import timedelta
fig,ax = plt.subplots(figsize=(12,6))

for i, row in df_user.iterrows():
    ax.hlines(y=i, xmin=row["start_time"], xmax=row["end_time"], color='blue', linewidth=2)
    ax.hlines(y=i+0.25, xmin=row["start_time"], xmax=row["start_time"]+timedelta(seconds=row["client_ttft_s"]), color='red', linewidth=2)

    if i> 150:
        break

# Format the x-axis to show the date and time
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
plt.xticks(rotation=45)
plt.xlabel("Time")
plt.ylabel("LLM Call Index")
plt.title("LLM Calls Over Time")

In [None]:
df_summary

In [None]:
fig,ax = plt.subplots()
sns.barplot(data=df_user, x='batch_size_used', y='server_output_token_per_s_per_request')

In [None]:
fig,ax = plt.subplots()
sns.barplot(data=df_user, x='batch_size_used', y='server_ttft_s')

In [None]:
df_melted = df_user.melt(id_vars=['batch_size_used'], value_vars=['server_ttft_s', 'client_ttft_s'], 
                         var_name='Metric', value_name='Value')

# Create the plot
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(data=df_melted, x='batch_size_used', y='Value', hue='Metric', ax=ax)

# Customize the plot
plt.title('Boxplots for Server TTFT and Client TTFT')
plt.xlabel('Batch Size Used')
plt.ylabel('Values')

# Show the plot
plt.legend(title='Metric')
plt.show()

In [None]:
df_melted = df_user.melt(id_vars=['batch_size_used'], value_vars=['server_output_token_per_s_per_request', 'client_output_token_per_s_per_request'], 
                         var_name='Metric', value_name='Value')

# Create the plot
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(data=df_melted, x='batch_size_used', y='Value', hue='Metric', ax=ax)

# Customize the plot
plt.title('Boxplots for Server token/s and Client token/s per request')
plt.xlabel('Batch Size Used')
plt.ylabel('Values')

# Show the plot
plt.legend(title='Metric')
plt.show()

In [None]:
def plot_client_vs_server_barplots(df_user, x_col, y_cols, title, ylabel):
    df_melted = df_user.melt(id_vars=[x_col], value_vars=y_cols, 
                         var_name='Metric', value_name='Value')

    # Create the plot
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(data=df_melted, x='batch_size_used', y='Value', hue='Metric', ax=ax)

    # Customize the plot
    plt.title(title)
    plt.xlabel('Batch Size Used')
    plt.ylabel(ylabel)

    # Show the plot
    plt.legend(title='Metric')
    plt.show()

In [None]:
plot_client_vs_server_barplots(df_user, 'batch_size_used', 
                               ['server_output_token_per_s_per_request', 'client_output_token_per_s_per_request'], 
                               'Boxplots for Server token/s and Client token/s per request',
                               'tokens/s')

In [None]:
plot_client_vs_server_barplots(df_user, 
                               'batch_size_used', 
                               ['server_ttft_s', 'client_ttft_s'], 
                               'Boxplots for Server token/s and Client token/s per request',
                               'seconds')

In [None]:
plot_client_vs_server_barplots(df_user, 
                               'batch_size_used', 
                               ['server_end_to_end_latency_s', 'client_end_to_end_latency_s'], 
                               'Boxplots for Server latency and Client latency',
                               'seconds')

In [None]:
df_user.columns

In [None]:
from typing import List

def plot_client_vs_server_barplots(df_user: pd.DataFrame, x_col: str, y_cols: List[str], title: str, ylabel: str) -> None:
    """
    Plots bar plots for client vs server metrics from a DataFrame.

    Args:
        df_user (pd.DataFrame): The DataFrame containing the data to plot.
        x_col (str): The column name to be used as the x-axis.
        y_cols (List[str]): A list of column names to be used as the y-axis.
        title (str): The title of the plot.
        ylabel (str): The label for the y-axis.

    Returns:
        None
    """
    # Melt the DataFrame to have a long-form DataFrame suitable for Seaborn
    df_melted = df_user.melt(id_vars=[x_col], value_vars=y_cols, 
                             var_name='Metric', value_name='Value')

    # Create the plot
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(data=df_melted, x=x_col, y='Value', hue='Metric', ax=ax)

    # Customize the plot
    plt.title(title)
    plt.xlabel('Batch Size Used')
    plt.ylabel(ylabel)

    # Show the plot
    plt.legend(title='Metric')
    plt.show()

In [None]:
plot_client_vs_server_barplots(df_user, 
                               'batch_size_used', 
                               ['server_end_to_end_latency_s', 'client_end_to_end_latency_s'], 
                               'Boxplots for Server latency and Client latency',
                               'seconds')

In [None]:
df_summary1 = df_user.groupby('batch_size_used')['server_output_token_per_s_per_request'].mean().reset_index()


In [None]:

df_summary1

In [None]:
df_req_info = df_user
df_req_summary = (
    df_req_info.groupby("batch_size_used")[
        [
            "server_output_token_per_s_per_request",
            "client_output_token_per_s_per_request",
        ]
    ]
    .mean()
    .reset_index()
)
df_req_summary["server_throughput_token_per_s"] = (
    df_req_summary["server_output_token_per_s_per_request"]
    * df_req_summary["batch_size_used"]
)
df_req_summary["client_throughput_token_per_s"] = (
    df_req_summary["client_output_token_per_s_per_request"]
    * df_req_summary["batch_size_used"]
)
df_melted = pd.melt(df_req_summary, id_vars='batch_size_used', value_vars=['server_output_token_per_s_per_request', 'client_throughput_token_per_s'],
                    var_name='Value Type', value_name='Value')

In [None]:
df_melted

## Calls Gannt Chart
- Blue line is the total time to get back response
- Red line is the time call is waiting to be executed

In [None]:
from datetime import timedelta
fig,ax = plt.subplots(figsize=(12,6))

for i, row in df_user.iterrows():
    ax.hlines(y=i, xmin=row["start_time"], xmax=row["end_time"], color='blue', linewidth=2)
    ax.hlines(y=i+0.25, xmin=row["start_time"], xmax=row["start_time"]+timedelta(seconds=row["client_ttft_s"]), color='red', linewidth=2)

    if i > 100: # Limit to first 100 calls
        break

# Format the x-axis to show the date and time
# ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
plt.xticks(rotation=45)
plt.xlabel("Time")
plt.ylabel("LLM Call Index")
plt.title("LLM Calls Over Time")