### Imports

In [None]:
import os
import pickle

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from utils import find_mean_coord, get_cdf, get_tick_positions, DATASETS_META, ROOT_DIR

In [None]:
mpl.rcParams.update(mpl.rcParamsDefault)
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

### Figure 1 (Dataset Characteristics)

#### Preprocess

In [None]:
MAX_SEQ_LEN = 4096

In [None]:
for name, meta in DATASETS_META.items():
    with open(meta["path"], "rb") as f:
        data = pickle.load(f)

    filtered = []
    for pair in data:
        input_tokens, output_tokens = pair

        if isinstance(input_tokens, int):
            input_tokens = [1000] * input_tokens
        if isinstance(output_tokens, int):
            output_tokens = [1000] * output_tokens
        
        input_len = len(input_tokens)
        output_len = len(output_tokens)

        # Filter out too long sequences.
        if input_len + output_len < MAX_SEQ_LEN:
            filtered.append((input_len, output_len))

    DATASETS_META[name]["requests"] = filtered

#### Plot

In [None]:
dataset_label = {
    "alpaca": "Alpaca",
    "cnn_dailymail": "CNN DailyMail",
    "dolly": "Dolly",
    "sharegpt": "ShareGPT",
}

In [None]:
axis_title_size = 32
label_size = 32
legend_size = 32

In [None]:
scale = 2
fig, axs = plt.subplots(1, 2, figsize=[scale*6.4, 4.8], constrained_layout=True)

ax = axs.flat[0]
for name, meta in sorted(DATASETS_META.items()):
    requests = meta["requests"]
    
    # Plot line
    x, y = get_cdf([req[0] for req in requests])

    ax.plot(x, y, label=dataset_label[name], color=meta["color"], linewidth=4)

    # Plot mean values
    x_mean, y_mean = find_mean_coord(x, y)

    ax.plot(x_mean, y_mean, marker="o", markersize=8, color=meta["color"])

# Plot median line
xmin, xmax = ax.get_xlim()

ax.hlines(y=0.5, xmin=xmin, xmax=xmax, colors="#999999", ls="--", linewidth=2,
        alpha=0.8)

ax.set_ylabel("Probability (%)", size=axis_title_size)
ax.set_yticks([0.0, 0.25, 0.5, 0.75, 1.0])
ax.set_yticklabels(["0 ", "25", "50", "75", "100"])
ax.tick_params(axis="y", labelsize=label_size)

ax.set_xlabel("Input Length (token)", size=axis_title_size)
ax.set_xscale("log", base=10, subs=[])
ax.set_xticks([1, 10, 100, 1000])
ax.set_xticklabels(["", "10", "100", "1000"])
ax.tick_params(axis="x", labelsize=label_size)

ax = axs.flat[1]
for name, meta in DATASETS_META.items():
    requests = meta["requests"]
    
    # Plot line
    x, y = get_cdf([req[1] for req in requests])
    
    ax.plot(x, y, label=None, color=meta["color"], linewidth=4)

    # Plot mean values
    x_mean, y_mean = find_mean_coord(x, y)

    ax.plot(x_mean, y_mean, marker="o", markersize=8, color=meta["color"])

# Plot median line
xmin, xmax = ax.get_xlim()

ax.hlines(y=0.5, xmin=xmin, xmax=xmax, colors="#999999", ls="--", linewidth=2,
        alpha=0.8)

ax.set_yticks([])

ax.set_xlabel("Output Length (token)", size=axis_title_size)
ax.set_xscale("log", base=10, subs=[])
ax.set_xticks([1, 10, 100, 1000])
ax.set_xticklabels(["", "10", "100", "1000"])
ax.tick_params(axis="x", labelsize=label_size)

leg = fig.legend(
    loc="upper center", fontsize=legend_size, ncols=4,
    bbox_to_anchor=(0.5, 1.25), columnspacing=1,
    handlelength=1.2, handletextpad=0.5
)

for handle in leg.legend_handles:
    handle.set_linewidth(7.5)
file_path = os.path.join(ROOT_DIR, "figures", "figure_1.pdf")

fig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")

plt.show()

### Figure 2 (Maximum Request Rate under SLO per Use Case, Maximum Throughput per Use Case)

In [None]:
RES_DIR = "./proc-outputs/"

#### Preprocess

In [None]:
from metadata import (
    ONLINE_EXPERIMENTS_META_OPT_13B,
    ONLINE_EXPERIMENTS_META_LLAMA_2_13B,
    ONLINE_EXPERIMENTS_META_OPT_7B,
    ONLINE_EXPERIMENTS_META_LLAMA_2_7B
)

In [None]:
SEED = 0
DURATION = 600
BLOCK_SIZE = 16
N = 1

In [None]:
for online_experiments_meta, model in zip(
    [ONLINE_EXPERIMENTS_META_OPT_13B, ONLINE_EXPERIMENTS_META_LLAMA_2_13B, ONLINE_EXPERIMENTS_META_OPT_7B, ONLINE_EXPERIMENTS_META_LLAMA_2_7B],
    ["opt-13b", "Llama-2-13b", "opt-6.7b", "Llama-2-7b"]):
    for dataset, meta in online_experiments_meta.items():
        scenarios = meta["scenarios"]

        for scenario in scenarios:
            num_of_blocks = scenario["total_blocks"]
            RESULTS_DIR = os.path.abspath(os.path.join(ROOT_DIR, "results", f"online-{num_of_blocks}"))

            for req_rate in scenario["req_rates"]:
                input_dir = os.path.join(RESULTS_DIR, dataset, model, f"n{N}", f"block{BLOCK_SIZE}",
                                        f"req-rate-{req_rate}", f"seed{SEED}", f"duration-{DURATION}"
                                        )
                requests_path = os.path.join(input_dir, "requests.pkl")
                with open(requests_path, "rb") as f:
                    requests = pickle.load(f)
            
                per_req_norm_latencies = []
                for req in requests:
                    arrival_time = req["arrival_time"]
                    finish_time = req["finish_time"]
                    output_len = len(req["output"]["outputs"][0]["token_ids"])

                    latency = finish_time - arrival_time
                    norm_latency = latency / output_len
                    per_req_norm_latencies.append(norm_latency)

                # Average normalized latency
                avg_latency = np.mean(per_req_norm_latencies)

                # P95 normalized latency
                p95_latency = np.percentile(per_req_norm_latencies, 95)

                scenario["req_rates"][req_rate] = {
                    "avg": avg_latency,
                    "p95": p95_latency
                }

In [None]:
with open(RES_DIR + "online_experiments_meta_opt_13b.pkl", 'wb') as f:
    pickle.dump(ONLINE_EXPERIMENTS_META_OPT_13B, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(RES_DIR + "online_experiments_llama_2_13b.pkl", 'wb') as f:
    pickle.dump(ONLINE_EXPERIMENTS_META_LLAMA_2_13B, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(RES_DIR + "online_experiments_opt_6.7b.pkl", 'wb') as f:
    pickle.dump(ONLINE_EXPERIMENTS_META_OPT_7B, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(RES_DIR + "online_experiments_llama_2_7b.pkl", 'wb') as f:
    pickle.dump(ONLINE_EXPERIMENTS_META_LLAMA_2_7B, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
from metadata import (
    OFFLINE_EXPERIMENTS_META_OPT_13B,
    OFFLINE_EXPERIMENTS_META_LLAMA_2_13B,
    OFFLINE_EXPERIMENTS_META_OPT_7B,
    OFFLINE_EXPERIMENTS_META_LLAMA_2_7B
)

In [None]:
SEED = 0
BLOCK_SIZE = 16
N = 1

In [None]:
for online_experiments_meta, model in zip(
    [OFFLINE_EXPERIMENTS_META_OPT_13B, OFFLINE_EXPERIMENTS_META_LLAMA_2_13B, OFFLINE_EXPERIMENTS_META_OPT_7B, OFFLINE_EXPERIMENTS_META_LLAMA_2_7B],
    ["opt-13b", "Llama-2-13b", "opt-6.7b", "Llama-2-7b"]):
    for dataset, meta in online_experiments_meta.items():
        scenarios = meta["scenarios"]

        for scenario in scenarios:
            num_of_blocks = scenario["total_blocks"]
            RESULTS_DIR = os.path.abspath(os.path.join(ROOT_DIR, "results", f"offline-{num_of_blocks}"))

            for num_of_requests in scenario["num_of_requests"]:
                input_dir = os.path.join(RESULTS_DIR, dataset, model, f"n{N}", f"block{BLOCK_SIZE}",
                                        f"num-requests-{num_of_requests}", f"seed{SEED}"
                                        )
                requests_path = os.path.join(input_dir, "requests.pkl")
                with open(requests_path, "rb") as f:
                    requests = pickle.load(f)

                start_time = sorted(requests, key=lambda d: d["arrival_time"])[0]["arrival_time"]
                end_time = sorted(requests, key=lambda d: d["finish_time"])[-1]["finish_time"]
                
                elapsed_time = end_time - start_time
                total_num_tokens = 0
                for request in requests:
                    prompt_len = len(request["output"]["prompt_token_ids"])

                    output_len = 0
                    for completion_output in request["output"]["outputs"].values():
                        output_len += len(completion_output["token_ids"])

                    total_num_tokens += prompt_len + output_len

                request_throughput = len(requests) / elapsed_time
                token_throughput = total_num_tokens / elapsed_time

                scenario["num_of_requests"][num_of_requests] = {
                    "request": request_throughput,
                    "token": token_throughput
            }

In [None]:
with open(RES_DIR + "offline_experiments_meta_opt_13b.pkl", 'wb') as f:
    pickle.dump(OFFLINE_EXPERIMENTS_META_OPT_13B, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(RES_DIR + "offline_experiments_meta_llama_2_13b.pkl", 'wb') as f:
    pickle.dump(OFFLINE_EXPERIMENTS_META_LLAMA_2_13B, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(RES_DIR + "offline_experiments_meta_opt_6.7b.pkl", 'wb') as f:
    pickle.dump(OFFLINE_EXPERIMENTS_META_OPT_7B, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(RES_DIR + "offline_experiments_meta_llama_2_7b.pkl", 'wb') as f:
    pickle.dump(OFFLINE_EXPERIMENTS_META_LLAMA_2_7B, f, protocol=pickle.HIGHEST_PROTOCOL)

#### Load

In [None]:
with open(RES_DIR + "online_experiments_meta_opt_13b.pkl", "rb") as f:
    ONLINE_EXPERIMENTS_META_OPT_13B = pickle.load(f)

with open(RES_DIR + "online_experiments_llama_2_13b.pkl", "rb") as f:
    ONLINE_EXPERIMENTS_META_LLAMA_2_13B = pickle.load(f)

with open(RES_DIR + "online_experiments_opt_6.7b.pkl", "rb") as f:
    ONLINE_EXPERIMENTS_META_OPT_7B = pickle.load(f)

with open(RES_DIR + "online_experiments_llama_2_7b.pkl", "rb") as f:
    ONLINE_EXPERIMENTS_META_LLAMA_2_7B = pickle.load(f)

In [None]:
with open(RES_DIR + "offline_experiments_meta_opt_13b.pkl", "rb") as f:
    OFFLINE_EXPERIMENTS_META_OPT_13B = pickle.load(f)

with open(RES_DIR + "offline_experiments_meta_llama_2_13b.pkl", "rb") as f:
    OFFLINE_EXPERIMENTS_META_LLAMA_2_13B = pickle.load(f)

with open(RES_DIR + "offline_experiments_meta_opt_6.7b.pkl", "rb") as f:
    OFFLINE_EXPERIMENTS_META_OPT_7B = pickle.load(f)

with open(RES_DIR + "offline_experiments_meta_llama_2_7b.pkl", "rb") as f:
    OFFLINE_EXPERIMENTS_META_LLAMA_2_7B = pickle.load(f)

#### Plot

In [None]:
axis_title_size = 22
label_size = 22
legend_size = 20

In [None]:
# 4 words per second, 1 token is approx 3/4 word, 5.333 token/s, SLO is 0.1875s/token
SLO = 0.1875
METRIC = "p95"
THROUGHPUT_METRIC = "request"
NUM_OF_REQUESTS = 1000

In [None]:
mpl.rcParams["hatch.color"] = "#141414"
mpl.rcParams["hatch.linewidth"] = 3.5
hatches = ["", "//"]

x = ["Alpaca", "CNN DailyMail", "Dolly", "ShareGPT"]
labels = ["OPT-13B", "Lllama-2-13B"]
w = 0.4
l = 0.05
L = 1

ticks = get_tick_positions(x, labels, w, l , L)

In [None]:
fig, ax = plt.subplots(figsize=[6.4, 4.8])

idx = 0
for online_experiments_meta in [ONLINE_EXPERIMENTS_META_OPT_13B, ONLINE_EXPERIMENTS_META_LLAMA_2_13B]:
    max_request_rates = []
    for dataset, meta in online_experiments_meta.items():
        scenarios = meta["scenarios"]

        for scenario in scenarios:
            if scenario["total_blocks"] != 643:
                continue
            
            req_rates = sorted(scenario["req_rates"])
            max_req_rate = 0
            for i in range(len(req_rates)):
                if i in [0, 1]:
                    continue

                if scenario["req_rates"][req_rates[i]][METRIC] > SLO and scenario["req_rates"][req_rates[i-1]][METRIC] > SLO:
                    if scenario["req_rates"][req_rates[i-2]][METRIC] > SLO:
                        max_req_rate = 0
                    else:
                        max_req_rate = req_rates[i-2]
                    break
            
            max_request_rates.append(max_req_rate)

    bar_colors = [dataset["color"] for dataset in online_experiments_meta.values()]

    ax.bar(ticks[idx], max_request_rates, w, color=bar_colors, label=labels[idx], hatch=hatches[idx])
    idx = idx + 1

ax.set_ylabel("Max Request Rate (req/s)", size=axis_title_size)
ax.set_yticks([0, 5, 10, 15, 20, 25, 30])
ax.tick_params(axis="y", labelsize=label_size)

ax.set_xticks([])

ax.legend(fontsize=legend_size)

leg = ax.get_legend()
leg.legend_handles[0].set_facecolor('white')
leg.legend_handles[0].set_edgecolor('#141414')
leg.legend_handles[1].set_facecolor('white')
leg.legend_handles[1].set_edgecolor('#141414')

ax.text(0.625, 0.75, "0.5", fontsize=16)
ax.text(1.0125, 0.25, "0.02", fontsize=16)

file_path = os.path.join(ROOT_DIR, "figures", "figure_2a.pdf")

fig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")
        

In [None]:
fig, ax = plt.subplots(figsize=[6.4, 4.8])

idx = 0
for offline_experiments_meta in [OFFLINE_EXPERIMENTS_META_OPT_13B, OFFLINE_EXPERIMENTS_META_LLAMA_2_13B]:
    throughputs = []
    for dataset, meta in offline_experiments_meta.items():
        scenarios = meta["scenarios"]

        for scenario in scenarios:
            if scenario["total_blocks"] != 643:
                continue

            throughputs.append(scenario["num_of_requests"][NUM_OF_REQUESTS][THROUGHPUT_METRIC])

    bar_labels = [dataset["name"] for dataset in offline_experiments_meta.values()]
    bar_colors = [dataset["color"] for dataset in offline_experiments_meta.values()]

    bars = ax.bar(ticks[idx], throughputs, w, color=bar_colors, label=labels[idx], hatch=hatches[idx])
    idx = idx + 1

ax.set_ylabel("Throughput (req/s)", size=axis_title_size)
ax.tick_params(axis="y", labelsize=label_size)

ax.set_xticks([])

ax.legend(fontsize=legend_size)

leg = ax.get_legend()
leg.legend_handles[0].set_facecolor('white')
leg.legend_handles[0].set_edgecolor('#141414')
leg.legend_handles[1].set_facecolor('white')
leg.legend_handles[1].set_edgecolor('#141414')

file_path = os.path.join(ROOT_DIR, "figures", "figure_2b.pdf")

fig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")

In [None]:
legendFig = plt.figure()

dataset_names = [dataset["name"] for dataset in OFFLINE_EXPERIMENTS_META_OPT_13B.values()]
leg = legendFig.legend(
    bars, dataset_names, loc="lower left", fontsize=legend_size, ncols=4,
    handlelength=2
)

for handle in leg.legend_handles:
    handle.set_hatch("")

file_path = os.path.join(ROOT_DIR, "figures", "legend_figure_2.pdf")

legendFig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")
plt.close()

### Figure 3a (Request Rate under SLO per Number of Blocks)

In [None]:
mpl.rcParams["hatch.color"] = "#000000"
mpl.rcParams["hatch.linewidth"] = 3.5

In [None]:
axis_title_size = 28
label_size = 28
legend_size = 24

In [None]:
# 4 words per second, 1 token is approx 3/4 word, 5.333 token/s, SLO is 0.1875s/token
SLO = 0.1875
METRIC = "p95"

In [None]:
scale = 2
fig, axs = plt.subplots(2, 2, figsize=[scale*6.4, scale*4.8], sharex=True)

for dataset, dataset_name, ax in zip(["alpaca", "cnn_dailymail", "dolly", "sharegpt"], ["Alpaca", "CNN DailyMail", "Dolly", "ShareGPT"], axs.flat):
    x = ["OPT", "Llama-2"]
    num_of_total_blocks = [150, 300, 450, 600]
    w = 0.175
    l = 0.05

    L = 1

    positions = get_tick_positions(x, num_of_total_blocks, w, l, L)

    label_names = [r"1$\times$", r"2$\times$", r"3$\times$", r"4$\times$"]
    colors = ["#cae6f4", "#9ad5f5", "#64c1f4", "#3fa7e7"]

    opt_bars = []
    llama_bars = []
    block_bars = []
    for idx, total_blocks in enumerate(num_of_total_blocks):
        max_request_rates = []
        for ONLINE_METADATA, TITLE, suffix in zip(
            [ONLINE_EXPERIMENTS_META_OPT_7B, ONLINE_EXPERIMENTS_META_LLAMA_2_7B],
            ["OPT-6.7B", "Llama-2-7B"],
            ["opt_6.7b", "llama_2_7b"]
        ):
            
            scenarios = ONLINE_METADATA[dataset]["scenarios"]

            for scenario in scenarios:
                if scenario["total_blocks"] != total_blocks:
                    continue

                req_rates = sorted(scenario["req_rates"])
                max_req_rate = 0
                for i in range(len(req_rates)):
                    if i in [0, 1]:
                        continue

                    if scenario["req_rates"][req_rates[i]][METRIC] > SLO and scenario["req_rates"][req_rates[i-1]][METRIC] > SLO:
                        if scenario["req_rates"][req_rates[i-2]][METRIC] > SLO:
                            max_req_rate = 0
                        else:
                            max_req_rate = req_rates[i-2]
                        break
                
                max_request_rates.append(max_req_rate)

        if total_blocks not in [150, 300]:
            label = "_nolegend_"
        elif total_blocks == 150:
            label = "OPT-6.7B"
        else:
            label = "Llama-2-7B"

        bar = ax.bar(positions[idx], max_request_rates, w, color=colors[idx], label=label)

        if total_blocks == 150:
            opt_bars.append(bar)

        if total_blocks == 300:
            llama_bars.append(bar)

        block_bars.append(bar)

        max_request_rates = []
        for ONLINE_METADATA, TITLE, suffix in zip(
            [ONLINE_EXPERIMENTS_META_OPT_13B, ONLINE_EXPERIMENTS_META_LLAMA_2_13B],
            ["OPT-13B", "Llama-2-13B"],
            ["opt_13b", "llama_2_13b"]
        ):
            
            scenarios = ONLINE_METADATA[dataset]["scenarios"]

            for scenario in scenarios:
                if scenario["total_blocks"] != total_blocks:
                    continue

                req_rates = sorted(scenario["req_rates"])
                max_req_rate = 0
                for i in range(len(req_rates)):
                    if i in [0, 1]:
                        continue

                    if scenario["req_rates"][req_rates[i]][METRIC] > SLO and scenario["req_rates"][req_rates[i-1]][METRIC] > SLO:
                        if scenario["req_rates"][req_rates[i-2]][METRIC] > SLO:
                            max_req_rate = 0
                        else:
                            max_req_rate = req_rates[i-2]
                        break
                
                max_request_rates.append(max_req_rate)
        
        if total_blocks not in (150, 300):
            label = "_nolegend_"
        elif total_blocks == 150:
            label = "OPT-13B"
        else:
            label = "Llama-2-13B"

        bar = ax.bar(positions[idx], max_request_rates, w, color=colors[idx], label=label, hatch="//", alpha=.99)

        if total_blocks == 150:
            opt_bars.append(bar)

        if total_blocks == 300:
            llama_bars.append(bar)

    ax.tick_params(axis="y", labelsize=label_size)

    if dataset == "alpaca":
        ax.set_yticks([0, 10, 20, 30, 40, 50])
        ax.set_ylim(0, 51)
    elif dataset in ["cnn_dailymail", "sharegpt"]:
        ax.set_yticks([0, 2, 4, 6])
        ax.set_ylim(0, 6.12)
    else:
        ax.set_yticks([0, 10, 20, 30])
        ax.set_ylim(0, 30.6)

    ax.set_xticks([0, 1])
    ax.set_xticklabels(["OPT", "Llama-2"])
    ax.tick_params(axis="x", labelsize=label_size)

    ax.set_title(dataset_name, fontsize=axis_title_size)

fig.text(0.025, 0.5, "Max Request Rate (req/s)", size=axis_title_size+4, va='center', rotation='vertical')

file_path = os.path.join(ROOT_DIR, "figures", "figure_3a.pdf")

fig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")

### Figure 3b (Throughput per Number of Blocks)

In [None]:
mpl.rcParams["hatch.color"] = "#000000"
mpl.rcParams["hatch.linewidth"] = 3.5

In [None]:
axis_title_size = 28
label_size = 28
legend_size = 24

In [None]:
THROUGHPUT_METRIC = "request"
NUM_OF_REQUESTS = 1000

In [None]:
scale = 2
fig, axs = plt.subplots(2, 2, figsize=[scale*6.4, scale*4.8], sharex=True)

for dataset, dataset_name, ax in zip(["alpaca", "cnn_dailymail", "dolly", "sharegpt"], ["Alpaca", "CNN DailyMail", "Dolly", "ShareGPT"], axs.flat):
    x = ["OPT", "Llama-2"]
    num_of_total_blocks = [150, 300, 450, 600]
    w = 0.175
    l = 0.05

    L = 1

    positions = get_tick_positions(x, num_of_total_blocks, w, l, L)

    label_names = [r"1$\times$", r"2$\times$", r"3$\times$", r"4$\times$"]
    colors = ["#cae6f4", "#9ad5f5", "#64c1f4", "#3fa7e7"]

    opt_bars = []
    llama_bars = []
    block_bars = []
    for idx, total_blocks in enumerate(num_of_total_blocks):
        throughputs = []
        for OFFLINE_METADATA, TITLE, suffix in zip(
            [OFFLINE_EXPERIMENTS_META_OPT_7B, OFFLINE_EXPERIMENTS_META_LLAMA_2_7B],
            ["OPT-6.7B", "Llama-2-7B"],
            ["opt_6.7b", "llama_2_7b"]
        ):
            
            scenarios = OFFLINE_METADATA[dataset]["scenarios"]

            for scenario in scenarios:
                if scenario["total_blocks"] != total_blocks:
                    continue

                throughputs.append(scenario["num_of_requests"][NUM_OF_REQUESTS][THROUGHPUT_METRIC])

        if total_blocks not in [150, 300]:
            label = "_nolegend_"
        elif total_blocks == 150:
            label = "OPT-6.7B"
        else:
            label = "Llama-2-7B"

        bar = ax.bar(positions[idx], throughputs, w, color=colors[idx], label=label)

        if total_blocks == 150:
            opt_bars.append(bar)

        if total_blocks == 300:
            llama_bars.append(bar)

        block_bars.append(bar)

        throughputs = []
        for OFFLINE_METADATA, TITLE, suffix in zip(
            [OFFLINE_EXPERIMENTS_META_OPT_13B, OFFLINE_EXPERIMENTS_META_LLAMA_2_13B],
            ["OPT-13B", "Llama-2-13B"],
            ["opt_13b", "llama_2_13b"]
        ):
            
            scenarios = OFFLINE_METADATA[dataset]["scenarios"]

            for scenario in scenarios:
                if scenario["total_blocks"] != total_blocks:
                    continue

                throughputs.append(scenario["num_of_requests"][NUM_OF_REQUESTS][THROUGHPUT_METRIC])
        
        if total_blocks not in (150, 300):
            label = "_nolegend_"
        elif total_blocks == 150:
            label = "OPT-13B"
        else:
            label = "Llama-2-13B"

        bar = ax.bar(positions[idx], throughputs, w, color=colors[idx], label=label, hatch="//", alpha=.99)

        if total_blocks == 150:
            opt_bars.append(bar)

        if total_blocks == 300:
            llama_bars.append(bar)

    ax.tick_params(axis="y", labelsize=label_size)

    if dataset == "alpaca":
        ax.set_yticks([0, 10, 20, 30, 40, 50])
        ax.set_ylim(0, 54)
    elif dataset in ["cnn_dailymail", "sharegpt"]:
        ax.set_yticks([0, 2, 4, 6, 8])
        ax.set_ylim(0, 8.64)
    else:
        ax.set_yticks([0, 10, 20, 30])
        ax.set_ylim(0, 32.4)

    ax.set_xticks([0, 1])
    ax.set_xticklabels(["OPT", "Llama-2"])
    ax.tick_params(axis="x", labelsize=label_size)

    ax.set_title(dataset_name, fontsize=axis_title_size)

fig.text(0.025, 0.5, "Throughput (req/s)", size=axis_title_size+4, va='center', rotation='vertical')

file_path = os.path.join(ROOT_DIR, "figures", "figure_3b.pdf")

fig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")

In [None]:
scale=4.75
scale=2.25
legendFig, axs = plt.subplots(2, 1, figsize=[scale*6.4, 3], gridspec_kw = {'wspace':0, 'hspace':0})

handles = opt_bars + llama_bars
labels = ["OPT-6.7B", "OPT-13B", "Llama-2-7B", "Llama-2-13B"]

axs[0].axis("off")
leg_1 = axs[0].legend(
    handles, labels, loc="lower center", fontsize=legend_size, ncols=4,
    handlelength=1.5, columnspacing=1.5, mode="expand", handletextpad=0.6,
    title="Models", title_fontsize=legend_size
)

for handle in leg_1.legend_handles:
    handle.set_facecolor('white')
    handle.set_edgecolor('#141414')

axs[1].axis("off")
leg_2 = axs[1].legend(
    block_bars, label_names, loc="lower center", fontsize=legend_size, ncols=4,
    handlelength=2, mode="expand",
    title="Memory", title_fontsize=legend_size
)

for handle in leg_2.legend_handles:
    handle.set_edgecolor('#141414')

file_path = os.path.join(ROOT_DIR, "figures", "legend_figure_3.pdf")

legendFig.savefig(file_path, dpi=300, bbox_inches="tight", format="pdf")
plt.close()