In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import sys

sys.path.append("../..")
sys.path.append(".")

from plot_results_for_paper import (
    plot_benchmark_result_table,
    plot_combined_benchmark_result_table,
    rc_context_wrapper,
    rc_context_wrapper_quarter_col_plot,
    select_columns,
)
from pathlib import Path
from plot_config_for_paper import linestyle_mapping, style_dict
import pickle

import matplotlib.pyplot as plt

from plot_config_for_paper import FIGSIZE, FIGSIZE_2COL

In [2]:
with open("ttft_raw_data.p", "rb") as f:
    raw_data = pickle.load(f)

ttft_1_df = raw_data["ttft_1"]
ttft_100_df = raw_data["ttft_100"]
token_per_sec_df = raw_data["token_per_sec"]

## Final Plots - All results

In [3]:
selected_columns = {
    "llama3": "R--llama3__tcm__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
    "llama2": "R--llama2__tcm__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
    "falcon_mamba": "R--falcon_mamba__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
    "codestral_mamba": "R--codestral_mamba__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
    "xlstm": "R--xlstm__tcm__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False_isd-bfloat16_ed-4096_nh-8_nb-32_vs-50304_wm-fused_ck-chunkwise--triton_xl_chunk_sk-native_sequence__triton_step_fused_sk-triton_fused_cs-128_akd-bfloat16",
}
filename_suffix = ""
add_legend = True

In [4]:
ttft_1_plot_df = select_columns(
    ttft_1_df, selected_columns, keep_col_regex=".*prefill.*"
)

In [5]:
ttft_100_plot_df = select_columns(
    ttft_100_df, selected_columns, keep_col_regex=".*prefill.*"
)

In [None]:
FIGSIZE_2COL

First, create the legend

In [None]:
# Create common legend for plots

fig = rc_context_wrapper(
    func=plot_combined_benchmark_result_table,
    result_dfs=[ttft_1_plot_df, ttft_100_plot_df],
    x_axis_param="prefill_length",
    # linestyle_mapping=linestyle_mapping,
    style_dict=style_dict,
    style_dict_colname_mapping_exact=False,
    y_labels=["Time to First Token [ms]", "Time to First 100 Tokens [ms]"],
    x_label="Prefill Length",
    title="",  # "Time to generate 1 tokens, for varying prefill lengths",
    figsize=FIGSIZE_2COL,  # (1.3 * 12 * 2.1 / 2.54, 1.5 * 8 * 1 / 2.54),
    filename=f"legend{filename_suffix}",  # f"paper-combined_timetofirsttoken_1_and_100_tokens{filename_suffix}",
    add_legend=add_legend,
    legend_only=True,
    legend_args={
        "loc": "lower center",
        "ncol": 5,
        "bbox_to_anchor": (0.0, 0.90, 1.0, 0.502),
        "frameon": False,
        "facecolor": "white",
    },
)
plt.show()

In [None]:
fig = rc_context_wrapper(
    func=plot_combined_benchmark_result_table,
    result_dfs=[ttft_1_plot_df, ttft_100_plot_df],
    x_axis_param="prefill_length",
    # linestyle_mapping=linestyle_mapping,
    style_dict=style_dict,
    style_dict_colname_mapping_exact=False,
    y_labels=["Time to First Token [ms]", "Time to First 100 Tokens [ms]"],
    x_label="Prefill Length",
    title="",  # "Time to generate 1 tokens, for varying prefill lengths",
    figsize=FIGSIZE_2COL,  # (1.3 * 12 * 2.1 / 2.54, 1.5 * 8 * 1 / 2.54),
    filename=None,  # f"paper-combined_timetofirsttoken_1_and_100_tokens{filename_suffix}",
    add_legend=add_legend,
    legend_args={
        "loc": "lower center",
        "ncol": 5,
        "bbox_to_anchor": (0.0, 0.90, 1.0, 0.502),
        "frameon": False,
        "facecolor": "white",
    },
)
plt.show()

In [9]:
token_per_sec_plot_df = select_columns(
    token_per_sec_df, selected_columns, keep_col_regex=".*prefill.*"
)

In [None]:
FIGSIZE

In [None]:
fig = rc_context_wrapper_quarter_col_plot(
    func=plot_benchmark_result_table,
    result_df=token_per_sec_plot_df,
    x_axis_param="prefill_length",
    # linestyle_mapping=linestyle_mapping,
    style_dict=style_dict,
    style_dict_colname_mapping_exact=False,
    y_label="Tokens per Second",
    title="",  # "Tokens per second during generation of 100 tokens after consuming varying prefill lengths (bs1 gl100)",
    x_label="Prefill Length",
    figsize=FIGSIZE,
    filename=f"paper-timetofirsttoken_tokens_per_sec{filename_suffix}",
    add_legend=None,  # add_legend,
    legend_args={
        "loc": "lower center",
        "ncol": 3,
        "bbox_to_anchor": (-0.04, 1.02, 1.0, 0.502),
        "frameon": False,
        "facecolor": "white",
    },
)
plt.show()

In [12]:
# selected_columns = {
#     "llama3": "R--llama3__tcm__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
#     # "llama2": "R--llama2__tcm__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
#     # "falcon_mamba": "R--falcon_mamba__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
#     "codestral_mamba": "R--codestral_mamba__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False",
#     "xlstm": "R--xlstm__tcm__ampdt-bfloat16__wdt-bfloat16__ucgg-True_ucgm-False_isd-bfloat16_ed-4096_nh-8_nb-32_vs-50304_wm-fused_ck-chunkwise--triton_xl_chunk_sk-native_sequence__triton_step_fused_sk-triton_fused_cs-128_akd-bfloat16",
# }
# filename_suffix = "only_3"

In [13]:
# ttft_1_plot_df = select_columns(
#     ttft_1_df, selected_columns, keep_col_regex=".*prefill.*"
# )

In [14]:
# fig = rc_context_wrapper(
#     func=plot_benchmark_result_table,
#     result_df=ttft_1_plot_df,
#     x_axis_param="prefill_length",
#     # linestyle_mapping=linestyle_mapping,
#     style_dict=style_dict,
#     style_dict_colname_mapping_exact=False,
#     y_label="Time to First Token [ms]",
#     x_label="Prefill Length",
#     title="",  # "Time to generate 1 tokens, for varying prefill lengths",
#     figsize=(1.5 * 12 * 1 / 2.54, 1.5 * 8 * 1 / 2.54),
#     filename=f"timetofirsttoken_1_tokens{filename_suffix}",
#     add_legend=add_legend,
# )

In [15]:
# ttft_100_plot_df = select_columns(
#     ttft_100_df, selected_columns, keep_col_regex=".*prefill.*"
# )

In [16]:
# fig = rc_context_wrapper(
#     func=plot_benchmark_result_table,
#     result_df=ttft_100_plot_df,
#     x_axis_param="prefill_length",
#     # linestyle_mapping=linestyle_mapping,
#     style_dict=style_dict,
#     style_dict_colname_mapping_exact=False,
#     y_label="Time to First 100 Token [ms]",
#     x_label="Prefill Length",
#     title="",  # "Time to generate 100 tokens, for varying prefill lengths",
#     figsize=(1.5 * 12 * 1 / 2.54, 1.5 * 8 * 1 / 2.54),
#     filename=f"timetofirsttoken_100_tokens_prefill_length{filename_suffix}",
#     add_legend=add_legend,
# )

In [17]:
# token_per_sec_plot_df = select_columns(
#     token_per_sec_df, selected_columns, keep_col_regex=".*prefill.*"
# )

In [18]:
# fig = rc_context_wrapper(
#     func=plot_benchmark_result_table,
#     result_df=token_per_sec_plot_df,
#     x_axis_param="prefill_length",
#     # linestyle_mapping=linestyle_mapping,
#     style_dict=style_dict,
#     style_dict_colname_mapping_exact=False,
#     y_label="Tokens per Second",
#     title="",  # "Tokens per second during generation of 100 tokens after consuming varying prefill lengths (bs1 gl100)",
#     x_label="Prefill Length",
#     figsize=(1.5 * 12 * 1 / 2.54, 1.5 * 8 * 1 / 2.54),
#     filename=f"timetofirsttoken_tokens_per_sec{filename_suffix}",
#     add_legend=add_legend,
# )