In [None]:
import pandas as pd
import seaborn as sns
import os


In [None]:
dfs = []

bench_dir = "bench/etcd"

for store_config in os.listdir(bench_dir):
    print(f"processing {store_config}")
    parts = store_config.split(",")
    config = {}
    for part in parts:
        kv = part.split("=")
        config[kv[0]] = kv[1]

    store = config["store"]
    tls = config["tls"] == "True"
    enclave = "sgx" if config["sgx"] == "True" else "virtual"
    worker_threads = int(config["worker_threads"])
    clients = int(config["clients"])
    connections = int(config["connections"])
    prefill_num_keys = int(config["prefill_num_keys"])
    prefill_value_size = int(config["prefill_value_size"])
    rate = int(config["rate"])
    total = int(config["total"])
    cmd = config["bench_args"]

    file = os.path.join(bench_dir, store_config, "timings.csv")
    if not os.path.exists(file):
        continue
    df = pd.read_csv(file)
    # fix csv files not being fully complete
    df = df[df["start_micros"] > 1666000000000000]
    start = df["start_micros"].min()

    df["start_micros"] -= start
    df["start_ms"] = df["start_micros"] / 1000
    df.drop(["start_micros"], axis=1, inplace=True)
    df["end_micros"] -= start
    df["end_ms"] = df["end_micros"] / 1000
    df.drop(["end_micros"], axis=1, inplace=True)
    df["latency_ms"] = df["end_ms"] - df["start_ms"]
    df["cmd"] = cmd
    df["store"] = store
    df["tls"] = tls
    df["enclave"] = enclave
    df["worker_threads"] = worker_threads
    df["clients"] = clients
    df["connections"] = connections
    df["prefill_num_keys"] = prefill_num_keys
    df["prefill_value_size"] = prefill_value_size
    df["rate"] = rate
    df["total"] = total
    dfs.append(df)

all_data = pd.concat(dfs, ignore_index=True)
all_data.head()


In [None]:
all_data[all_data.latency_ms < 0]


In [None]:
all_data = all_data[all_data.latency_ms >= 0]


In [None]:
def merged_variable_columns_without(all_data, without):
    all_columns = list(all_data.columns)
    data_columns = ["start_ms", "start_s", "end_ms", "latency_ms"]
    # variable columns are all the ones left
    variable_columns = [c for c in all_columns if c not in data_columns]
    remaining_columns = [c for c in variable_columns if c not in without]

    def make_new_column(name):
        if name == "store":
            return all_data[name].astype(str)
        elif name == "tls":
            return all_data[name].map(lambda t: "tls" if t else "notls")
        else:
            return f"{name}=" + all_data[name].astype(str)

    invariant_columns = []
    variant_columns = []
    for c in remaining_columns:
        data = all_data[c]
        if len(set(data)) == 1:
            n = make_new_column(c)
            invariant_columns.append(n.iat[0])
        else:
            variant_columns.append(c)

    variant_column = pd.Series()
    num_cols = len(variant_columns)
    for i, c in enumerate(variant_columns):
        n = make_new_column(c)
        if num_cols != i + 1:
            n = n + ","
        if i != 0:
            n = variant_column + n
        variant_column = n

    return variant_column, invariant_columns


In [None]:
all_data["start_s"] = all_data["start_ms"] / 1000


In [None]:
all_data


In [None]:
# make the plots dir
os.makedirs("plots/etcd", exist_ok=True)


In [None]:
def plot_scatter(data, x="start_s", y="latency_ms", row="",col="" ,ignore_vars=[], filename=""):
    hue = "vars"

    var, invariant_vars = merged_variable_columns_without(
        data, [x, y, row, col, hue] + ignore_vars
    )
    data[hue] = var

    p = sns.relplot(
        kind="scatter",
        data=data,
        x=x,
        y=y,
        row=row,
        col=col,
        hue=hue,
        alpha=0.5,
    )

    p.figure.subplots_adjust(top=0.9)
    p.figure.suptitle(",".join(invariant_vars))

    # add tick labels to each x axis
    for ax in p.axes.flatten():
        ax.tick_params(labelbottom=True)

    #     ax.set_xlim([20,21])

    if not filename:
        filename = f"scatter-{x}-{y}-{row}-{col}-{hue}"

    p.savefig(f"plots/etcd/{filename}.svg")
    p.savefig(f"plots/etcd/{filename}.jpg")

    return p


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data = plot_data[plot_data["connections"] == 1]
plot_data = plot_data[plot_data["clients"] == 1]

p = plot_scatter(plot_data, row="rate", col="worker_threads", ignore_vars=["total"])


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "range_0000_1000"]
plot_data = plot_data[plot_data["connections"] == 1]
plot_data = plot_data[plot_data["clients"] == 1]

p = plot_scatter(plot_data, row="rate", col="worker_threads", ignore_vars=["total"])


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data = plot_data[plot_data["worker_threads"] <= 1]
plot_data = plot_data[plot_data["rate"] == 100]

p = plot_scatter(plot_data, row="clients", col="connections")


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data = plot_data[plot_data["worker_threads"] <= 1]
plot_data = plot_data[plot_data["clients"] == 10]
plot_data = plot_data[plot_data["connections"] >= 10]
# plot_data = plot_data[plot_data["enclave"] == "virtual"]

p = plot_scatter(plot_data, row="connections", col="rate", ignore_vars=["total"])


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data["vars"], invariant_vars = merged_variable_columns_without(
    all_data, ["rate", "worker_threads"]
)

p = sns.displot(
    kind="ecdf",
    data=plot_data,
    x="latency_ms",
    row="rate",
    col="worker_threads",
    hue="vars",
)

p.figure.subplots_adjust(top=0.8)
p.figure.suptitle(",".join(invariant_vars))

# add tick labels to each x axis
for ax in p.axes.flatten():
    ax.tick_params(labelbottom=True)

p.savefig("plots/ecdf.svg")
p.savefig("plots/ecdf.jpg")


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data = plot_data[plot_data["worker_threads"] <= 1]
plot_data = plot_data[plot_data["clients"] == 10]
plot_data = plot_data[plot_data["connections"] == 10]
plot_data["vars"], invariant_vars = merged_variable_columns_without(
    all_data, ["rate", "prefill_num_keys", "rate", "total"]
)

grouped = plot_data.groupby(["vars", "prefill_num_keys", "total"])
throughputs = grouped.first()

durations = (grouped["end_ms"].max() - grouped["start_ms"].min()) / 1000
counts = grouped["start_ms"].count()
achieved_throughput = counts / durations
throughputs["achieved_throughput_ratio"] = achieved_throughput / throughputs["rate"]

throughputs.reset_index(inplace=True)

p = sns.catplot(
    kind="bar",
    data=throughputs,
    x="rate",
    y="achieved_throughput_ratio",
    # row="rate",
    col="prefill_num_keys",
    hue="vars",
)

p.figure.subplots_adjust(top=0.8)
p.figure.suptitle(",".join(invariant_vars))

# add tick labels to each x axis
for ax in p.axes.flatten():
    ax.tick_params(labelbottom=True)

p.savefig("plots/throughput.svg")
p.savefig("plots/throughput.jpg")
