In [None]:
import pandas as pd
import seaborn as sns
import os


In [None]:
dfs = []

bench_dir = "bench"

for cmd in os.listdir(bench_dir):
    for store_config in os.listdir(os.path.join(bench_dir, cmd)):
        print(f"processing {cmd}/{store_config}")
        parts = store_config.split(",")
        config = {}
        for part in parts:
            kv = part.split("=")
            config[kv[0]] = kv[1]

        store = config["name"]
        tls = config["tls"] == "True"
        enclave = "sgx" if config["sgx"] == "True" else "virtual"
        worker_threads = int(config["worker_threads"])
        clients = int(config["clients"])
        connections = int(config["connections"])

        file = os.path.join(bench_dir, cmd, store_config, "timings.csv")
        if not os.path.exists(file):
            continue
        df = pd.read_csv(file)
        start = df["start_micros"].min()
        df["start_micros"] -= start
        df["start_ms"] = df["start_micros"] / 1000
        df.drop(["start_micros"], axis=1, inplace=True)
        df["end_micros"] -= start
        df["end_ms"] = df["end_micros"] / 1000
        df.drop(["end_micros"], axis=1, inplace=True)
        df["latency_ms"] = df["end_ms"] - df["start_ms"]
        df["cmd"] = cmd
        df["store"] = store
        df["tls"] = tls
        df["enclave"] = enclave
        df["worker_threads"] = worker_threads
        df["clients"] = clients
        df["connections"] = connections
        dfs.append(df)

all_data = pd.concat(dfs, ignore_index=True)
all_data.head()


In [None]:
all_data[all_data.latency_ms < 0]


In [None]:
all_data = all_data[all_data.latency_ms >= 0]
all_data = all_data[all_data.latency_ms < 500]


In [None]:
def merged_variable_columns_without(all_data, without):
    all_columns = list(all_data.columns)
    data_columns = ["start_ms", "start_s", "end_ms", "latency_ms"]
    # variable columns are all the ones left
    variable_columns = [c for c in all_columns if c not in data_columns] 
    remaining_columns = [c for c in variable_columns if c not in without]

    def make_new_column(name ):
        if name == "store":
            return all_data[name].astype(str)
        elif name == "tls":
            return all_data[name].map(lambda t: "tls" if t else "notls")
        else:
            return f"{name}=" + all_data[name].astype(str)

    new_column = pd.Series()
    num_cols = len(remaining_columns)
    for i, c in enumerate(remaining_columns):
        n = make_new_column(c )
        if num_cols != i + 1:
            n = n + ","
        if i != 0 :
            n = new_column + n
        new_column = n

    return new_column

In [None]:
all_data["start_s"] = all_data["start_ms"] / 1000


In [None]:
all_data


In [None]:
# make the plots dir
os.makedirs("plots", exist_ok=True)


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data["other_vars"] = merged_variable_columns_without(all_data, ["cmd", "enclave", "clients", "connections"])

p = sns.relplot(
    kind="scatter",
    data=plot_data,
    x="start_s",
    y="latency_ms",
    row="clients",
    col="connections",
    hue="other_vars",
    alpha=0.5,
)

# add tick labels to each x axis
for ax in p.axes.flatten():
    ax.tick_params(labelbottom=True)

#     ax.set_xlim([20,21])

p.savefig("plots/scatter.svg")
p.savefig("plots/scatter.jpg")


In [None]:
plot_data = all_data.copy(deep=False)
plot_data = plot_data[plot_data["cmd"] == "put"]
plot_data["other_vars"] = merged_variable_columns_without(all_data, ["cmd", "enclave", "clients", "connections"])

p = sns.displot(
    kind="ecdf",
    data=plot_data,
    x="latency_ms",
    row="clients",
    col="connections",
    hue="other_vars",
)

# add tick labels to each x axis
for ax in p.axes.flatten():
    ax.tick_params(labelbottom=True)

p.savefig("plots/ecdf.svg")
p.savefig("plots/ecdf.jpg")
