In [None]:
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
import numpy as np
import analysis


In [None]:
analyser = analysis.Analyser("k6")
all_data = analyser.get_data()
all_data


In [None]:
req_duration_data = all_data[
    all_data["metric_name"].isin(["http_req_duration", "grpc_req_duration"])
]
req_duration_data.columns


In [None]:
# drop setup data
req_duration_data = req_duration_data[req_duration_data["group"] != "::setup"]


In [None]:
data = req_duration_data.copy()
parts = data["name"].str.split("?", expand=True).rename(columns={0: "path", 1: "query"})
parts["path"] = parts["path"].str.lstrip("https://127.0.0.1:8000")
req_duration_data["path"] = parts["path"]
if "query" in parts:
    req_duration_data["query"] = parts["query"]
else:
    req_duration_data["query"] = [None for _ in range(len(parts["path"]))]


In [None]:
# normalise endpoints from path
req_duration_data["endpoint"] = req_duration_data["path"].map(
    lambda x: x.split("/")[-1].lower().replace("_", "")
)


In [None]:
ignore_vars = [
    "metric_name",
    "metric_value",
    "service",
    "subproto",
    "proto",
    "scenario",
    "status",
    "tls_version",
    "url",
    "name",
    "extra_tags",
    "metadata",
    "check",
    "error",
    "error_code",
    "expected_response",
    "group",
    "method",
    "query",
    "path",
    "nodes",
]


In [None]:
data = req_duration_data.copy()
if len(data["query"].dropna()) > 0:
    var, invariant_vars = analysis.condense_vars(data, ignore_vars + ["path"])
    data["vars"] = var

    txids = data["query"].str.split("=", expand=True)[1]
    data["txids"] = txids
    txid_counts = data.groupby(["txids", "vars"]).size()
    txid_counts = pd.DataFrame(txid_counts)
    txid_counts.rename(columns={0: "committed_count"}, inplace=True)

    ax = sns.displot(kind="ecdf", data=txid_counts, x="committed_count", hue="vars")
    ax.set(title="number of commit checks before committed")
    filename = f"commit_checks_ecdf-committed_count-None-None-vars"
    ax.savefig(os.path.join(analyser.plot_dir(), f"{filename}.jpg"))


In [None]:
data = req_duration_data.copy()
if len(data["query"].dropna()) > 0:
    var, invariant_vars = analysis.condense_vars(data, ignore_vars + ["path"])
    data["vars"] = var

    grouped = data.groupby(["path", "query", "vars"])

    starts = grouped.min("start_ms")
    ends = grouped.max("end_ms")

    commit_latency_ms = ends["end_ms"] - starts["start_ms"]

    latencies = pd.DataFrame(commit_latency_ms)
    latencies.rename(columns={0: "commit_latency"}, inplace=True)

    ax = sns.displot(kind="ecdf", data=latencies, x="commit_latency", hue="vars")
    ax.set(title="commit latency")
    filename = f"commit_latency_ecdf-commit_latency-None-None-vars"
    ax.savefig(os.path.join(analyser.plot_dir(), f"{filename}.jpg"))


In [None]:
# plot_data = req_duration_data.copy(deep=False)
# plot_data = plot_data[plot_data["content_type"] == "json"]
# plot_data = plot_data[plot_data["rate"] <= 200]
# plot_data = plot_data[plot_data["enclave"] == "virtual"]
# p = analyser.plot_scatter(plot_data, col="http_version", ignore_vars=ignore_vars)
# p.figure.suptitle("")
# p.set(xlabel="start time (ms)", ylabel="latency (ms)")


# All vars

In [None]:
plot_data = req_duration_data.copy(deep=False)
p = analyser.plot_ecdf(plot_data, col="endpoint", ignore_vars=ignore_vars)
p.figure.suptitle("")
p.set(xlabel="latency (ms)")


In [None]:
plot_data = req_duration_data.copy(deep=False)
p = analyser.plot_percentile_latency_over_time(
    plot_data, col="http_version", ignore_vars=ignore_vars, percentile=0.99
)
p.figure.suptitle("")
p.set(xlabel="time (ms)", ylabel="latency (ms)")


In [None]:
plot_data = req_duration_data.copy(deep=False)
p = analyser.plot_throughput_over_time(
    plot_data, col="http_version", ignore_vars=ignore_vars + ["endpoint"], interval=1000
)
p.figure.suptitle("")
p.set(xlabel="time (ms)", ylabel="achieved throughput (req/s)")


# latency throughput plot func

In [None]:
def plot_latency_and_throughput(datasets, labels, ignore_vars):
    figure, axis = plt.subplots(2, 1, sharex=True)

    axis[0].grid(True)
    axis[1].grid(True)

    for dataset in datasets:
        # check that we don't have hidden variables grouped
        var, invariant_vars = analysis.condense_vars(dataset, ignore_vars)
        assert len(var) == 0, set(var)

    print("Invariants:", invariant_vars)

    def percentile_latencies(data):
        end = data["start_s"].max()
        group_cols = [pd.cut(data["start_s"], np.arange(0, end, interval))]
        grouped = data.groupby(group_cols)
        latencies = grouped.quantile(percentile, numeric_only=True)
        mid = latencies.index.map(lambda x: (x.left + x.right) // 2)
        latencies["mid"] = mid
        x = latencies["mid"]
        y = latencies["latency_ms"]
        return x, y

    def throughput_over_time(data):
        x = data["start_s"]
        end = data["start_s"].max()
        group_cols = [pd.cut(data["start_s"], np.arange(0, end, interval))]
        grouped = data.groupby(group_cols)
        throughputs = grouped.count() // interval
        mid = throughputs.index.map(lambda x: (x.left + x.right) // 2)
        throughputs["mid"] = mid
        x = throughputs["mid"]
        y = throughputs["latency_ms"]
        return x, y

    interval = 1
    percentile = 0.99

    for (dataset, label) in zip(datasets, labels):
        x, y = percentile_latencies(dataset)

        axis[0].plot(x, y, label=label)

    axis[0].legend()
    axis[0].set_ylabel("Request latency (ms)")

    for (dataset, label) in zip(datasets, labels):
        x, y = throughput_over_time(dataset)
        axis[1].plot(x, y, label=label)

    axis[1].legend()

    axis[1].set_xlabel("Time (s)")
    axis[1].set_ylabel("Achieved throughput (req/s)")

    return figure, axis


# gRPC vs JSON

In [None]:
plot_data = req_duration_data.copy(deep=False)

plot_data = plot_data[plot_data["http_version"] == 2]
plot_data = plot_data[plot_data["enclave"] == "sgx"]
plot_data = plot_data[plot_data["start_ms"] > 2000]

json_data = plot_data[plot_data["content_type"] == "json"]
grpc_data = plot_data[plot_data["content_type"] == "grpc"]

datasets = [json_data, grpc_data]
for dataset in datasets:
    dataset["start_ms"] -= dataset["start_ms"].min()
    dataset["start_s"] = dataset["start_ms"] / 1000

plot_latency_and_throughput(
    datasets, ["JSON", "gRPC"], ignore_vars + ["endpoint", "start_s"]
)


# HTTP1 vs HTTP2

In [None]:
plot_data = req_duration_data.copy(deep=False)

plot_data = plot_data[plot_data["content_type"] == "json"]
plot_data = plot_data[plot_data["enclave"] == "sgx"]
plot_data = plot_data[plot_data["start_ms"] > 2000]

http1_data = plot_data[plot_data["http_version"] == 1]
http2_data = plot_data[plot_data["http_version"] == 2]

datasets = [http1_data, http2_data]
for dataset in datasets:
    dataset["start_ms"] -= dataset["start_ms"].min()
    dataset["start_s"] = dataset["start_ms"] / 1000

plot_latency_and_throughput(
    datasets, ["HTTP1", "HTTP2"], ignore_vars + ["endpoint", "start_s"]
)


# Overhead of sgx

In [None]:
plot_data = req_duration_data.copy(deep=False)

plot_data = plot_data[plot_data["content_type"] == "grpc"]
plot_data = plot_data[plot_data["http_version"] == 2]
plot_data = plot_data[plot_data["vus"] == 100]
plot_data = plot_data[plot_data["start_ms"] > 2000]

sgx_data = plot_data[plot_data["enclave"] == "sgx"]
virtual_data = plot_data[plot_data["enclave"] == "virtual"]

datasets = [sgx_data, virtual_data]
for dataset in datasets:
    dataset["start_ms"] -= dataset["start_ms"].min()
    dataset["start_s"] = dataset["start_ms"] / 1000

plot_latency_and_throughput(
    datasets, ["SGX", "Virtual"], ignore_vars + ["endpoint", "start_s"]
)


In [None]:
# plot with skipping start to avoid connection setup flurry
plot_data = req_duration_data.copy(deep=False)
plot_data = plot_data[plot_data["path"] != "app/tx"]
# plot_data = plot_data[plot_data["http_version"] ==2]
analyser.plot_achieved_throughput_bar(
    plot_data, col="content_type", ignore_vars=ignore_vars
)


In [None]:
plot_data = req_duration_data.copy(deep=False)
# plot_data = plot_data[plot_data["http_version"] == 1]
# plot_data = plot_data[plot_data["nodes"] == 1]
p = analyser.plot_throughput_bar(plot_data, ignore_vars=ignore_vars + ["endpoint"])
p.figure.suptitle("")
p.set(xlabel="target throughput (req/s)", ylabel="achieved throughput ratio")


In [None]:
plot_data = req_duration_data.copy(deep=False)
plot_data = plot_data[plot_data["http_version"] == 1]
plot_data = plot_data[plot_data["rate"] == 4000]
plot_data = plot_data[plot_data["nodes"] == 1]
var, inv = analysis.condense_vars(plot_data, ignore_vars)
plot_data["nodes"]


In [None]:
plot_data = req_duration_data.copy(deep=False)
plot_data = plot_data[plot_data["http_version"] == 1]
plot_data = plot_data[plot_data["nodes"].notna()]
p = analyser.plot_throughput_bar(plot_data, ignore_vars=ignore_vars + ["endpoint"])
p.figure.suptitle("")
p.set(xlabel="target throughput (req/s)", ylabel="achieved throughput ratio")


In [None]:
plot_data = req_duration_data.copy(deep=False)
# plot_data = plot_data[plot_data["http_version"] == 1]
p = analyser.plot_throughput_bar(plot_data, ignore_vars=ignore_vars + ["endpoint"])
p.figure.suptitle("")
p.set(xlabel="target throughput (req/s)", ylabel="achieved throughput ratio")


In [None]:
plot_data = req_duration_data.copy(deep=False)
p = analyser.plot_target_throughput_latency_line(
    plot_data, ignore_vars=ignore_vars + ["endpoint"]
)
p.figure.suptitle("")
