In [None]:
import pandas as pd
import glob
import json
import dotted # https://pypi.org/project/dotted-notation/
import re
import matplotlib.pyplot as plt

from pathlib import Path
import seaborn as sns
import lib.datasciencetoolbelt as dstools
from lib.resultstorage import ResultStorage

In [None]:
dstools.setup({
    "seaborn_context": "talk",
    "savefig": {
        "enable": True,
        "dir": Path("./postprocess_results"),
    }
})
result_storage = ResultStorage(Path("./results"))

#%matplotlib qt
%matplotlib inline


In [None]:
def zfs_recordsize_to_bytes(i: str):
    match = re.search(r"^(?P<number>\d+)(?P<unit>[kmg])", i)
    f = {
        "k": 1024,
        "m": 1024 * 1024,
        "g": 1024 * 1024 * 1024,
    }
    return int(match["number"]) * f[match["unit"]]

In [None]:
           
def to_row_dict(output_json):
    d = output_json
    jobs = dotted.get(d, "fio_jsonplus.jobs")
    assert len(jobs) == 1
    j0 = jobs[0]
    jw = jobs[0]["write"]
    
    fio_blocksize = dotted.get(d,"fio_config.blocksize")

    
    if dotted.get(d, "test_subject") in ["devdax", "fsdax"]:
        pass
    else:
        zfs_recordsize = zfs_recordsize_to_bytes( dotted.get(d, "zfs_setup.filesystem_properties.recordsize"))
        assert zfs_recordsize == fio_blocksize
    
    return {
        "test_subject": dotted.get(d, "test_subject"),
        "numjobs": int(dotted.get(d, "fio_config.numjobs")),
        "sync": int(dotted.get(d, "fio_config.sync")),
        "size": int(dotted.get(d, "fio_config.size")),
        "blocksize": fio_blocksize,
        "w_iops": jw["iops"],
        "w_iops_mean": jw["iops_mean"],
        "w_iops_stddev": jw["iops_stddev"],
        "w_io_bytes": jw["io_bytes"],
        "bw_mean": jw["bw_mean"],
        "bw_dev": jw["bw_dev"],
        "w_lat_mean": dotted.get(jw, "lat_ns.mean"),
        "w_lat_stddev": dotted.get(jw, "lat_ns.stddev"),
    }
    

In [None]:
rows = [to_row_dict(j) for j in result_storage.iter_results("perf")]
df = pd.DataFrame.from_dict(rows)

In [None]:
df

# A Few Assertions About The Data

In [None]:
# all plots below assume that the series did not vary blocksize
blocksizes = set(df['blocksize'])
assert blocksizes == set({4096})

# assert size is 200MiB (we need this small block size so that main pool throughput is not the bottleneck)
assert set(df['size']) == {200 * (1<<20)}

# First Of All Clarify Difference Between `w_iops` and `w_iops_mean`

In [None]:
data = df.copy()
data = data.melt(id_vars=["test_subject", "numjobs"], value_vars=["w_iops", "w_iops_mean"])
data

In [None]:
g = sns.FacetGrid(data, col="test_subject", col_wrap=2, height=6, aspect=1.5)
g.map_dataframe(sns.lineplot, x='numjobs', y='value', hue='variable')
g.add_legend()

=> We see the effect of `end_fsync=1`

==> Proceed with `w_iops_mean` for our experiments because we don't care about the performance of the main pool.

# 4k write absolute comparison IOPS

In [None]:
def plt_abs_compare_iops_and_latency(subjects):    
    data = df.copy()
    data = data.melt(id_vars=["test_subject", "numjobs"], value_vars=["w_iops_mean", "w_iops_stddev"])
    
    data = data[data.test_subject.isin(subjects)]

    g = sns.FacetGrid(data, col="variable", height=6, sharey=False)
    g.map_dataframe(sns.lineplot, x='numjobs', y='value', hue='test_subject', style='test_subject', markers=True)
    g.add_legend()

In [None]:
test_subject_order = ["devdax", "fsdax", "async", "zil-lwb", "zil-pmem"]
iops_ylim = (0, 630000)

In [None]:
def plt_abs_compare(subjects, value, title, unit, ylim=None, xlim=None):
    data = df.copy()
#     data = data.melt(id_vars=["test_subject", "numjobs"], value_vars=[value])

    # subjects must be ordered like test_subject_order otherwise the legend is off
    def value_list_is_sorted(l, key):
        """can't believe python doesn't have this"""
        return l == sorted(l, key=key)
    assert value_list_is_sorted(subjects, test_subject_order.index)
    
    data = data[data.test_subject.isin(subjects)]

    f = plt.figure(figsize=(8, 6))
    lp = sns.lineplot(data=data, x='numjobs', y=value, hue='test_subject', style='test_subject', markers=True,
                      hue_order=test_subject_order, style_order=test_subject_order, legend=False)
    lp.set_title(title, pad=16)
    lp.set_ylabel(unit)
    lp.set_xticks(range(2, 17, 2))
    lp.set_ylim(ylim)
    lp.set_xlim(xlim)
    lp.set_xlabel("Number of fio threads (--numjobs)")
    if len(subjects) > 1:
        lp.legend(subjects)

In [None]:
plt_abs_compare(["devdax", "fsdax"], "w_iops_mean", "Raw PMEM 4k Write Performance", "IOPS",
                ylim=iops_ylim)
dstools.savefig("4k_rawpmem_iops")
plt_abs_compare(["devdax", "fsdax"], "w_lat_mean", "Raw PMEM 4k Write Latency (ioengine=dev-dax)", "Latency (usec)")
dstools.savefig("4k_rawpmem_lat")

In [None]:
data = df.copy()
display(data[data.test_subject == "devdax"]["w_iops_mean"].max())

In [None]:
# IMPORTANT NOTE: the ordering of the variables must be the same as test_subject_order
plt_abs_compare(["devdax", "fsdax", "async", "zil-lwb"], "w_iops_mean", "ZFS: Async vs Sync Write Performance", "IOPS",
               ylim=iops_ylim)
dstools.savefig("4k_async_vs_sync_perf")
plt_abs_compare(["devdax", "fsdax", "async", "zil-lwb"], "w_lat_mean", "ZFS: Async vs Sync Write Latency", "nano seconds",
               ylim=(1, 175 * 1000))
dstools.savefig("4k_async_vs_sync_lat")

In [None]:
data = df.copy()
data = data.pivot_table(values="w_lat_mean", index=["numjobs", "test_subject"])
data = data.query('numjobs in [1, 4, 8]')
data = data.unstack(level=0)
# latencies
display((data / 1000).round(1))
# speedup

zil_lwb = data.query("test_subject == 'zil-lwb'")
assert len(zil_lwb) == 1
# display(zil_lwb.iloc[0])

zil_pmem = data.query("test_subject == 'zil-pmem'")
assert len(zil_pmem) == 1
# display(zil_pmem.iloc[0])

display((zil_lwb.reset_index(drop=True) / zil_pmem.reset_index(drop=True)).round(1))

In [None]:
plt_abs_compare(["fsdax", "async", "zil-lwb", "zil-pmem"], "w_iops_mean", "ZIL-PMEM Performance Comparison", "IOPS",
               ylim=iops_ylim)
dstools.savefig("4k_zil_pmem_perf")
plt_abs_compare(["fsdax", "async", "zil-lwb", "zil-pmem"], "w_lat_mean", "ZIL-PMEM Latency Comparison", "nano seconds",
               ylim=(1, 175 * 1000))
dstools.savefig("4k_zil_pmem_lat")
plt_abs_compare(["fsdax", "async", "zil-pmem"], "w_lat_mean", "ZIL-PMEM Latency Comparison", "nano seconds",
               ylim=(1, 30 * 1000), xlim=(0, 8))
dstools.savefig("4k_zil_pmem_lat_zoomed")

# 4k write speedup in IOPS (zil-lwb as baseline, without devdax)

In [None]:
data = df.copy()
data = data.filter(["test_subject", "numjobs", "w_iops_mean", "w_iops_stddev"], axis=1)
data = data.set_index(["test_subject", "numjobs"], drop=True)
baseline = data.query("test_subject == 'zil-lwb'").droplevel(0)
baseline

In [None]:
df.query("test_subject == 'zil-pmem'")

In [None]:
# divide by baseline
speedup = data.divide(baseline, level=1)
speedup.query("test_subject == 'zil-pmem'")["w_iops_mean"]

In [None]:
d = speedup["w_iops_mean"].reset_index()
d = d.query("test_subject != 'devdax'")

subjects = test_subject_order.copy()
subjects.remove("devdax")
 # subjects must be ordered like test_subject_order otherwise the legend is off
def value_list_is_sorted(l, key):
    """can't believe python doesn't have this"""
    return l == sorted(l, key=key)
assert value_list_is_sorted(subjects, test_subject_order.index)
d = d[d.test_subject.isin(subjects)]


plt.figure(figsize=(8, 6))
ax = plt.axes()
lp = sns.lineplot(data=d, x='numjobs', y='w_iops_mean', hue='test_subject', style='test_subject', markers=True,
                  hue_order=test_subject_order, style_order=test_subject_order, legend=False,
                  ax=ax)
lp.set_ylim((0, 12))
lp.set_title("Speedup of IOPS (Baseline: zil-lwb)", pad=16)
lp.set_ylabel("Speedup")
lp.legend(subjects)

dstools.savefig("4k_speedup_lwb_baseline")

# 4k sync write latency corridor

In [None]:
plt_abs_compare(["async", "zil-lwb", "zil-pmem"], "w_lat_mean", "ZIL-PMEM Latency Comparison (2)", "nano seconds",
               ylim=(1, None))
dstools.savefig("4k_zil_pmem_lat_2")

# Data Export For Use In Latency Breakdown

In [None]:
df.query("test_subject == 'devdax'").set_index("numjobs").filter(["w_lat_mean"]).to_json(orient="table")