In [None]:
import glob
import json
import re

import ipywidgets as widgets
import pandas as pd
import seaborn.objects as so
from IPython.display import display
from itables import show
from pandas import DataFrame

In [None]:
# Load results

results_folder = "../bench/results"

# results/<tag>/<timestamp>/<size>.json
file_patter = re.compile(f"{results_folder}/([^/]+)/([^/]+)/([^/]+).json")
file_glob = f"{results_folder}/*/*/*.json"


def json_into_df(file_path: str) -> DataFrame:
    match = file_patter.fullmatch(file_path)
    if match is None:
        raise

    with open(file_path) as file:
        data = json.load(file)

    df = pd.json_normalize(data)
    df["tag"] = match.group(1)
    df["timestamp"] = match.group(2)
    df["size"] = int(match.group(3))
    df["tag/timestamp"] = df["tag"] + "/" + df["timestamp"]
    return df


def get_all_results():
    jsons = glob.glob(file_glob)
    dfs = map(json_into_df, jsons)
    return pd.concat(dfs, ignore_index=True)


df = get_all_results()

In [None]:
show(df)

In [None]:
# Derive metrics


# creates new column 'rate_bps' with the integer value of 'rate'
# most of the rows have a 'rate' like 123456bps, a minority have 'rate' like 100%, the later is converted to None
def fix(row):
    try:
        return int(row.rate.removesuffix("bps"))
    except ValueError:
        return None


df["rate_bps"] = df.apply(fix, axis=1)

# older measurements doesn't have a 'stage', so they have the value None in this column.
# 'stage' where added when we added the repetition stage, before that the first iteration were the warmup measure
# and subsequent ones were searching
df.loc[df["stage"].isna() & (df["iteration"] == 0), "stage"] = "warmup"
df.loc[df["stage"].isna() & (df["iteration"] != 0), "stage"] = "search"

# older measurements doesn't have the repetitions number, so we should set them to zero
df["repetitions"] = df["repetitions"].fillna(0).astype(int)

df["rate_gbps"] = df["rate_bps"] / 1000**3

df["actual_duration_s"] = df["actual_duration_ms"] / 1000

df["pps_out"] = df["stats.total.opackets"] / df["actual_duration_s"]
df["mpps_out"] = df["pps_out"] / 1000000

df["pps"] = df["stats.total.ipackets"] / df["actual_duration_s"]
df["mpps"] = df["pps"] / 1000000

df["actual_rate_bps_out"] = (df["stats.total.obytes"] * 8) / df["actual_duration_s"]
df["actual_rate_gbps_out"] = df["actual_rate_bps_out"] / 1000**3

df["actual_rate_bps"] = (df["stats.total.ibytes"] * 8) / df["actual_duration_s"]
df["actual_rate_gbps"] = df["actual_rate_bps"] / 1000**3

df["actual_rate_gbps_from_pps"] = df["mpps"] * df["size"]

df["gap"] = df["actual_rate_bps_out"] / df["rate_bps"] * 100

df = df.copy()  # for performance

In [None]:
df["size_calculated"] = df["stats.total.ibytes"] / df["stats.total.ipackets"]
show(df[["size_calculated", "size"]])

In [None]:
show(df[df["tag"] == "none"])

In [None]:
sorted(list(df.columns.values))

In [None]:
def print_parameters(df):
    ports = df["ports"].astype(str).unique()
    threshold = df["threshold"].unique()
    precision_bps = df["precision_bps"].unique()
    wait_time_s = df["wait_time_s"].unique()
    expected_duration_ms = df["expected_duration_ms"].unique()
    rx_delay_mx = df["rx_delay_ms"].unique()
    repetitions = df["repetitions"].unique()
    size = df["size"].unique()
    print(
        f"ports={ports} threshold={threshold} precision_bps={precision_bps} wait_time_s={wait_time_s} expected_duration_ms={expected_duration_ms} rx_delay_mx={rx_delay_mx} repetitions={repetitions} size={size}"
    )

In [None]:
timestamp_dropdown = widgets.Dropdown(
    options=df["timestamp"].drop_duplicates().sort_values(),
    description="Timestamp:",
)
size_dropdown = widgets.Dropdown(
    options=df[df["timestamp"] == timestamp_dropdown.value]["size"]
    .drop_duplicates()
    .sort_values(),
    description="Packet Size:",
)
tag_dropdown = widgets.Dropdown(
    options=df[
        (df["timestamp"] == timestamp_dropdown.value)
        & (df["size"] == size_dropdown.value)
    ]["tag"].unique(),
    description="Tag:",
)


def on_timestamp_change(change):
    selected_timestamp = change["new"]
    new_sizes = (
        df[df["timestamp"] == selected_timestamp]["size"]
        .drop_duplicates()
        .sort_values()
    )
    size_dropdown.options = new_sizes


def on_size_change(change):
    selected_timestamp = timestamp_dropdown.value
    selected_size = change["new"]

    # if selected_timestamp is not None:
    new_tags = df[
        (df["timestamp"] == selected_timestamp) & (df["size"] == selected_size)
    ]["tag"].unique()
    tag_dropdown.options = new_tags


size_dropdown.observe(on_size_change, names="value")
timestamp_dropdown.observe(on_timestamp_change, names="value")

In [None]:
show(
    df[["size", "tag", "timestamp", "expected_duration_ms"]]
    .sort_values(by=["timestamp", "size", "tag"], ascending=[False, False, True])
    .drop_duplicates()
)

In [None]:
def plot_drop_by_iteration(timestamp, size, tag):
    run = df[
        (df["tag"] == tag)
        & (df["timestamp"] == timestamp)
        & (df["size"] == size)
        & (df["iteration"] > 2)
    ]

    display(
        so.Plot(run, x="iteration", y="lost_percentage")
        .add(so.Line())
        .add(so.Line(color="red"), x=run["iteration"], y=run["threshold"].iloc[2])
        # .add(so.Line(color='green', x=, y=)
        .label(title=f"Tag: {tag}, Timestamp: {timestamp}, Packet size: {size} bytes")
        .limit(y=(0, None))
    )

    print_parameters(run)


widgets.interactive(
    plot_drop_by_iteration,
    timestamp=timestamp_dropdown,
    size=size_dropdown,
    tag=tag_dropdown,
)

In [None]:
sub = df.loc[df.groupby(["size", "tag", "timestamp"])["iteration"].idxmax()]
sub["under_threshold"] = sub["lost_percentage"] < sub["threshold"]
show(
    sub[["size", "tag", "timestamp", "expected_duration_ms", "under_threshold"]]
    .groupby("expected_duration_ms")
    .sum("under_threshold")
)
sub[["size", "tag", "timestamp", "expected_duration_ms", "under_threshold"]].groupby(
    "expected_duration_ms"
).count()
# show(sub[["size", "tag", "timestamp", "expected_duration_ms", "under_threshold"]])

In [None]:
def plot_rates_by_iteration(
    timestamp,
    size,
    tag,
):
    run = df[(df["tag"] == tag) & (df["timestamp"] == timestamp) & (df["size"] == size)]
    print_parameters(run)
    run = run.melt(
        id_vars=["iteration"],
        value_vars=[
            "actual_rate_gbps",
            "actual_rate_gbps_out",
            "rate_gbps",
        ],
        var_name="in_out",
        value_name="y",
    )
    display(
        so.Plot(run, x="iteration", y="y", color="in_out")
        .add(so.Line())
        .label(title=f"Tag: {tag}, Timestamp: {timestamp}, Packet size: {size} bytes")
    )


widgets.interactive(
    plot_rates_by_iteration,
    timestamp=timestamp_dropdown,
    size=size_dropdown,
    tag=tag_dropdown,
)

In [None]:
def plot_last_rate_by_size(timestamp, tag):
    run = df[(df["tag"] == tag) & (df["timestamp"] == timestamp)].copy()
    display(
        so.Plot(
            run.loc[run.groupby("size")["iteration"].idxmax()],
            x="size",
            y="actual_rate_gbps",
        )
        .add(so.Line())
        .add(so.Dot())
        .label(title=f"Tag: {tag}, Timestamp: {timestamp}")
    )
    print_parameters(run)


widgets.interactive(
    plot_last_rate_by_size,
    timestamp=timestamp_dropdown,
    tag=tag_dropdown,
)

In [None]:
def plot_latency_by_size(timestamp, tag):
    run = df[(df["tag"] == tag) & (df["timestamp"] == timestamp)]
    display(
        so.Plot(
            run.loc[run.groupby("size")["iteration"].idxmax()],
            x="size",
            y="stats.latency.0.latency.average",
            ymin="stats.latency.0.latency.total_min",
            ymax="stats.latency.0.latency.total_max",
        )
        .add(so.Line())
        .add(so.Band())
        .label(title=f"Tag: {tag}, Timestamp: {timestamp}")
    )
    print_parameters(run)


widgets.interactive(
    plot_latency_by_size,
    timestamp=timestamp_dropdown,
    tag=tag_dropdown,
)

In [None]:
def plot_latency_by_iteration(timestamp, size, tag):
    run = df[(df["tag"] == tag) & (df["timestamp"] == timestamp) & (df["size"] == size)]
    display(
        so.Plot(
            run,
            x="iteration",
            y="stats.latency.0.latency.average",
            ymin="stats.latency.0.latency.total_min",
            ymax="stats.latency.0.latency.total_max",
        )
        .add(so.Line())
        .add(so.Band())
        .label(title=f"Tag: {tag}, Timestamp: {timestamp}")
    )
    print_parameters(run)


widgets.interactive(
    plot_latency_by_iteration,
    timestamp=timestamp_dropdown,
    size=size_dropdown,
    tag=tag_dropdown,
)

In [None]:
# plot compare parameters of bpfwavelet


def plot_rate_by_parameters(timestamp, size):
    run = df[(df["timestamp"] == timestamp) & (df["size"] == size)]
    display(
        so.Plot(
            run.loc[run.groupby("tag")["iteration"].idxmax()],
            x="tag",
            y="actual_rate_gbps",
            color="tag",
        ).add(so.Bar())
    )
    print_parameters(run)


widgets.interactive(
    plot_rate_by_parameters,
    timestamp=timestamp_dropdown,
    size=size_dropdown,
    tag=tag_dropdown,
)

In [None]:
def plot_rate_from_repetitions_with_dispersion_by_size(timestamp, tag):
    run = df[
        (df["tag"] == tag) & (df["timestamp"] == timestamp) & (df["stage"] == "repeat")
    ]
    display(
        so.Plot(
            run,
            x="size",
            y="actual_rate_gbps",
        )
        .add(so.Line(), so.Agg("mean"))
        # .add(so.Dot(), so.Agg("mean"))
        .add(so.Range(), so.Est(errorbar="sd"))
        .label(title=f"Tag: {tag}, Timestamp: {timestamp}")
    )
    print_parameters(run)


widgets.interactive(
    plot_rate_from_repetitions_with_dispersion_by_size,
    timestamp=timestamp_dropdown,
    tag=tag_dropdown,
)