In [None]:
import pandas as pd
import glob
import json
import dotted # https://pypi.org/project/dotted-notation/
import re
import matplotlib.pyplot as plt
import json

from pathlib import Path
import seaborn as sns

In [None]:
import lib.datasciencetoolbelt as dstools
from lib.resultstorage import ResultStorage

In [None]:
savefig_enable = True
seaborn_context = "paper"
savefig_dir = "./postprocess_results"
textwidth = 5.5 #inch

In [None]:
dstools.setup({
    "seaborn_context": seaborn_context,
    "savefig": {
        "enable": savefig_enable,
        "dir": Path(savefig_dir),
    }
})
result_storage = ResultStorage(Path("./results"))

In [None]:
id_vars__dottedpath_and_shortname_and_type = [
#     ('result.identity', "benchmark", str),  
    ("storage_stack.identity", "storage_stack", str),
    ("result.fio_config.numjobs", "numjobs", int),
]
id_vars = [p[1] for p in id_vars__dottedpath_and_shortname_and_type]

def extract_id_var_values(output_json):
    d = output_json
    id_var_values = {}
    for dp, sn, ty in id_vars__dottedpath_and_shortname_and_type: 
        v = dotted.get(d, dp)
        if not v:
            raise Exception(f"{d['file']}: dotted path {dp} not found")
        if sn in id_var_values:
            raise Exception(f"duplicate shortname {sn}")
        try:
            id_var_values[sn] = ty(v)
        except ValueError as e:
            raise Exception(f"cannot parse v={v!r}") from e
    return id_var_values


def get_fio_write_metrics(output_json):
    d = output_json
    jobs = dotted.get(d, "fio_jsonplus.jobs")
    assert len(jobs) == 1
    j0 = jobs[0]
    jw = jobs[0]["write"]
    return {
        "w_iops_mean": jw["iops_mean"],
        "w_iops_stddev": jw["iops_stddev"],
        "w_lat_mean": dotted.get(jw, "lat_ns.mean"),
        "w_lat_stddev": dotted.get(jw, "lat_ns.stddev"),
    }


def to_row_dict(output_json):
    try:
        r = {}
        for k, v in dotted.get(output_json, "result.latency_analysis").items():
            assert k[0] == '@'
            k = k[1:] # strip leading @
            assert k not in r
            r[k] = v

        r = {
            **extract_id_var_values(output_json),
            "fio_metrics": get_fio_write_metrics(output_json['result']),
            
            # cpu stats
            "cpu": output_json["result"]["cpu_time"]["allcpu"],
            **r,
        }
        return r
    except:
        print(json.dumps(output_json))
        raise

rows = [to_row_dict(j) for j in result_storage.iter_results("zillwb_latency_analysis__v4")]

In [None]:
df = pd.DataFrame.from_dict(rows)
df = df.set_index(id_vars)
df = df.sort_index()
display(df)
# display(df / 1_000_000)
# compute zfs write breakdown

### Separate FIO and CPU Dataframes

In [None]:
tmp = df.copy()
df_fio = tmp['fio_metrics'].apply(pd.Series)
df_fio['iops_from_latency'] = 1_000_000_000 / df_fio.w_lat_mean * df_fio.index.to_frame()['numjobs']
df_fio['latency_from_iops'] = 1_000_000_000 / (df_fio.w_iops_mean / df_fio.index.to_frame()['numjobs'])
df_fio

In [None]:
tmp = df.copy()
tmp = tmp['cpu'].apply(pd.Series)
display(tmp)
# display(tmp)
cpu_total = tmp.sum(axis=1)
tmp['not_idle'] = cpu_total - tmp.idle
# second socket was disabled => half of total cpu time is idle time
tmp['utilization'] = tmp.not_idle / (cpu_total - (cpu_total/2))
tmp

### Remove `fio_metrics` and `cpu` from `df`

In [None]:
del df['fio_metrics']
del df['cpu']

# Compute Latency Breakdown

In [None]:
tmp = df.copy()


tmp['async'] = tmp.zfs_write - tmp.zil_commit - tmp.zfs_log_write
tmp['zillwb_other'] = tmp.zil_commit - (
    tmp.zil_fill_commit_list 
    + tmp.zillwb_lwb_write_issue
    + tmp.zillwb_commit_waiter__issue_cv
    + tmp.zillwb_commit_waiter__timeout_cv
)

components = [
    "async",
    "zfs_log_write",
    "zil_fill_commit_list",
    "zillwb_other",
    "zillwb_lwb_write_issue",
    "zillwb_commit_waiter__issue_cv",
    "zillwb_commit_waiter__timeout_cv",
]

# cummulative latencies
df_latbreakdown = tmp[components]
# per-write latencies
df_latbreakdown = df_latbreakdown.div(tmp.zfs_write_count, axis=0)
df_latbreakdown

In [None]:
relbreakdown = df_latbreakdown.copy()
total = relbreakdown.sum(axis=1)
relbreakdown = relbreakdown.div(total, axis=0)
# display(relbreakdown)

abs_breakdown = df_latbreakdown.copy()
display(abs_breakdown)
display(total)

delta_fio_ebpf = df_fio.w_lat_mean - abs_breakdown.copy().sum(axis=1)

In [None]:
rows = [
    ("relbreakdown", "Relative Latency Breakdown (eBPF)"),
    ("abs_breakdown", "Latency Breakdown Per IOP (eBPF)"),
    ("fio_latency_and_delta", "Latency Measured By Fio & Delta to eBPF"),
#     ("measurement_error", "Latency Per IOPS Delta (fio - eBPF)"), # covered by fio_latency_and_delta
    
    ("iops", "iops"),
#     ("avg_lwb_latency", "Avergage LWB Write Latency (eBPF)")
#     "latency_std",
]
    
nrows = len(rows)
ncols = 1
g, axes = plt.subplots(nrows, ncols, squeeze=False,
                       figsize=(textwidth ,4 * nrows),
                       gridspec_kw = {'hspace': 0.4})

for row in range(0, nrows):
    for col in range(0, ncols):

        storage_stack = {
            0: 'zfs-lwb-rs_0',
        }[col]

        row_name, row_displayname = rows[row]

        ax = axes[row, col]

        try:

            if row_name == "relbreakdown":
                relbreakdown.loc[storage_stack, ].plot.bar(ax=ax, stacked=True, ylim=(0, 1.1), legend=False)
                if col == ncols - 1:
                    ax.legend(loc="center right", ncol=2)
            elif row_name == "abs_breakdown":
                abs_breakdown.loc[storage_stack, ].plot.bar(ax=ax, stacked=True, legend=False)
                if col == ncols - 1:
                    ax.legend(loc="lower right", ncol=2)
            elif row_name == "iops":
                df_fio.loc[storage_stack, "w_iops_mean"].plot(ax=ax)
            elif row_name == "fio_latency_and_delta":
                yerr = delta_fio_ebpf.loc[storage_stack, ]
                df_fio.loc[storage_stack, "latency_from_iops"].plot.bar(ax=ax, yerr=yerr)
            elif row_name == "latency_std":
                df_fio.loc[storage_stack, "w_lat_stddev"].plot(ax=ax)
            elif row_name == "measurement_error":
                measurement_error.loc[storage_stack, ].plot.bar(ax=ax)
            else:
                raise Exception(f"unknown row name {row_name}")
            ax.set_title(f"{row_displayname}")

        except:
            print(row_name)
            raise

In [None]:
display(relbreakdown)
display(abs_breakdown)

In [None]:
storage_stack = "zfs-lwb-rs_0"

fig, axes = plt.subplots(2, figsize=(0.5 * textwidth, 3), gridspec_kw={"hspace":0.05})

ax = axes[0]
relbreakdown.mul(100).round(0).loc[storage_stack, ].plot.bar(
    ax=ax, stacked=True, ylim=(0, 100), legend=False,
    ylabel="Relative [%]")    

ax=axes[1]
abs_breakdown.div(1_000).loc[storage_stack, ].plot.bar(
    ax=ax, stacked=True, legend=False,
    ylabel="Absolute [us]"
)            

handles, labels = axes[1].get_legend_handles_labels()
fig.legend(handles, labels, loc='center', title="Component", bbox_to_anchor=(0.5, -0.2), ncol=2)
fig.suptitle("Average IOP Latency Breakdown", fontsize=9, y=0.95)

axes[0].set_xlabel("")
axes[0].set_xticklabels([])

dstools.savefig("zillwb_latency_analysis")