* See notes in `__v1` on why we chose `fio-4k-sync-rand-write--size-per-job`
* We only changed the fio runtime to 60s compared to `__v2`


In [None]:
import pandas as pd
import glob
import json
import dotted # https://pypi.org/project/dotted-notation/
import re
import matplotlib.pyplot as plt

from pathlib import Path
import seaborn as sns
import lib.datasciencetoolbelt as dstools
from lib.resultstorage import ResultStorage

In [None]:
dstools.setup({
    "seaborn_context": "talk",
    "savefig": {
        "enable": False,
        "dir": Path("./postprocess_results"),
    }
})
result_storage = ResultStorage(Path("./results"))

#%matplotlib qt
%matplotlib inline


In [None]:
id_vars__dottedpath_and_shortname_and_type = [
    ("subject", "test_subject", str),
    ("result.identity", "benchmark", str),
    ("result.fio_config.numjobs", "numjobs", int),
]
id_vars = [p[1] for p in id_vars__dottedpath_and_shortname_and_type]

def extract_id_var_values(output_json):
    d = output_json
    id_var_values = {}
    for dp, sn, ty in id_vars__dottedpath_and_shortname_and_type: 
        v = dotted.get(d, dp)
        if not v:
            raise Exception(f"{d['file']}: dotted path {dp} not found")
        if sn in id_var_values:
            raise Exception(f"duplicate shortname {sn}")
        try:
            id_var_values[sn] = ty(v)
        except ValueError as e:
            raise Exception(f"cannot parse v={v!r}") from e
    return id_var_values


def get_fio_write_metrics(output_json):
    d = output_json
    jobs = dotted.get(d, "fio_jsonplus.jobs")
    assert len(jobs) == 1
    j0 = jobs[0]
    jw = jobs[0]["write"]
    return jw


def to_row_dict(output_json):
    try:
        jw = get_fio_write_metrics(output_json["result"])

        return {
            **extract_id_var_values(output_json),
            
            # meta
            "file": output_json['file'],
            
            # fio
            "w_iops_mean": jw["iops_mean"],
            "w_iops_stddev": jw["iops_stddev"],
            "w_lat_mean": dotted.get(jw, "lat_ns.mean"),
            "w_lat_stddev": dotted.get(jw, "lat_ns.stddev"),
            "clat_p5": jw['clat_ns']['percentile']['5.000000'],
            "clat_p95": jw['clat_ns']['percentile']['95.000000'],
            "clat_p99": jw['clat_ns']['percentile']['99.000000'],
            "clat_p999": jw['clat_ns']['percentile']['99.900000'],
            "clat_p9999": jw['clat_ns']['percentile']['99.990000'],
        }
    except:
        print(json.dumps(output_json))
        raise
    

In [None]:
rows = [to_row_dict(j) for j in result_storage.iter_results("motivating_fio_benchmark__v3")]
df = pd.DataFrame.from_dict(rows)
# df = df.set_index(id_vars)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Next cell is where you play around with the benchmark type

In [None]:
df = df.query("benchmark == 'fio-4k-sync-rand-write--size-per-job'")
# df = df.query("benchmark == 'fio-4k-sync-rand-write--size-div-by-numjobs'")

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [None]:
#df = df.reset_index().drop('benchmark', axis=1).set_index(['test_subject', 'numjobs'])
df = df.drop('benchmark', axis=1)

In [None]:
df['test_subject'] = df.test_subject.map(lambda v: "async" if v == "sync-disabled" else v)

In [None]:
df

# 4k write absolute comparison IOPS

In [None]:
def plt_abs_compare_iops_and_latency(subjects):    
    data = df.copy()
    data = data.melt(id_vars=["test_subject", "numjobs"], value_vars=["w_iops_mean", "w_iops_stddev"])
    
    data = data[data.test_subject.isin(subjects)]

    g = sns.FacetGrid(data, col="variable", height=6, sharey=False)
    g.map_dataframe(sns.lineplot, x='numjobs', y='value', hue='test_subject', style='test_subject', markers=True)
    g.add_legend()

In [None]:
test_subject_order = ["devdax", "fsdax", "async", "zil-lwb", "zil-pmem"]
iops_ylim = (0,990_000)

In [None]:
def plt_abs_compare(subjects, value, title, unit, ylim=None, xlim=None):
    data = df.copy()
#     data = data.melt(id_vars=["test_subject", "numjobs"], value_vars=[value])

    # subjects must be ordered like test_subject_order otherwise the legend is off
    def value_list_is_sorted(l, key):
        """can't believe python doesn't have this"""
        return l == sorted(l, key=key)
    assert value_list_is_sorted(subjects, test_subject_order.index)
    
#     display(data)
    
    data = data[data.test_subject.isin(subjects)]

    f = plt.figure(figsize=(8, 6))
    lp = sns.lineplot(data=data, x='numjobs', y=value, hue='test_subject', style='test_subject', markers=True,
                      hue_order=test_subject_order, style_order=test_subject_order, legend=False)
    lp.set_title(title, pad=16)
    lp.set_ylabel(unit)
    lp.set_xticks(range(2, 10, 2))
    lp.set_ylim(ylim)
    lp.set_xlim(xlim)
    lp.set_xlabel("Number of fio threads (--numjobs)")
    if len(subjects) > 1:
        lp.legend(subjects)

In [None]:
plt_abs_compare(["devdax", "fsdax"], "w_iops_mean", "Raw PMEM 4k Write Performance", "IOPS",
                ylim=iops_ylim)
dstools.savefig("4k_rawpmem_iops")
plt_abs_compare(["devdax", "fsdax"], "w_lat_mean", "Raw PMEM 4k Write Latency", "Latency (usec)")
dstools.savefig("4k_rawpmem_lat")

In [None]:
data = df.copy()
display(data[data.test_subject == "devdax"]["w_iops_mean"].max())

In [None]:
# IMPORTANT NOTE: the ordering of the variables must be the same as test_subject_order
plt_abs_compare(["devdax", "fsdax", "async", "zil-lwb"], "w_iops_mean", "ZFS: Async vs Sync Write Performance", "IOPS",
               ylim=iops_ylim)
dstools.savefig("4k_async_vs_sync_perf")
plt_abs_compare(["devdax", "fsdax", "async", "zil-lwb"], "w_lat_mean", "ZFS: Async vs Sync Write Latency", "nano seconds",
               ylim=(1, 100 * 1000))
dstools.savefig("4k_async_vs_sync_lat")
plt_abs_compare(["devdax", "fsdax", "async"], "w_lat_mean", "ZIL-PMEM Latency Comparison", "nano seconds",
               ylim=(1, 15 * 1000), xlim=(0, 8))
dstools.savefig("4k_zil_lat_lat_zoomed")

In [None]:
data = df.copy()
data = data.pivot_table(values="w_lat_mean", index=["numjobs", "test_subject"])
data = data.query('numjobs in [1, 4, 8]')
data = data.unstack(level=0)
# latencies
display((data / 1000).round(1))
# speedup

zil_lwb = data.query("test_subject == 'zil-lwb'")
assert len(zil_lwb) == 1
# display(zil_lwb.iloc[0])

zil_pmem = data.query("test_subject == 'zil-pmem'")
assert len(zil_pmem) == 1
# display(zil_pmem.iloc[0])

display((zil_lwb.reset_index(drop=True) / zil_pmem.reset_index(drop=True)).round(1))

In [None]:
plt_abs_compare(["fsdax", "async", "zil-lwb", "zil-pmem"], "w_iops_mean", "ZIL-PMEM Performance Comparison", "IOPS",
               ylim=iops_ylim)
dstools.savefig("4k_zil_pmem_perf")
plt_abs_compare(["fsdax", "async", "zil-lwb", "zil-pmem"], "w_lat_mean", "ZIL-PMEM Latency Comparison", "nano seconds",
               ylim=(1, 175 * 1000))
dstools.savefig("4k_zil_pmem_lat")
plt_abs_compare(["fsdax", "async", "zil-pmem"], "w_lat_mean", "ZIL-PMEM Latency Comparison", "nano seconds",
               ylim=(1, 30 * 1000), xlim=(0, 8))
dstools.savefig("4k_zil_pmem_lat_zoomed")

# 4k write speedup in IOPS (zil-lwb as baseline, without devdax)

In [None]:
data = df.copy()
data = data.filter(["test_subject", "numjobs", "w_iops_mean", "w_iops_stddev"], axis=1)
data = data.set_index(["test_subject", "numjobs"], drop=True)
baseline = data.query("test_subject == 'zil-lwb'").droplevel(0)
print("zil-lwb")
display(baseline.sort_index())
display(data.query("test_subject == 'zil-pmem'").sort_index())

In [None]:
# divide by baseline
speedup = data.divide(baseline, level=1)
speedup.query("test_subject == 'zil-pmem'")["w_iops_mean"].sort_index()

In [None]:
d = speedup["w_iops_mean"].reset_index()
d = d.query("test_subject != 'devdax'")

subjects = test_subject_order.copy()
subjects.remove("devdax")
 # subjects must be ordered like test_subject_order otherwise the legend is off
def value_list_is_sorted(l, key):
    """can't believe python doesn't have this"""
    return l == sorted(l, key=key)
assert value_list_is_sorted(subjects, test_subject_order.index)
d = d[d.test_subject.isin(subjects)]


plt.figure(figsize=(8, 6))
ax = plt.axes()
lp = sns.lineplot(data=d, x='numjobs', y='w_iops_mean', hue='test_subject', style='test_subject', markers=True,
                  hue_order=test_subject_order, style_order=test_subject_order, legend=False,
                  ax=ax)
lp.set_ylim((0, 12))
lp.set_title("Speedup of IOPS (Baseline: zil-lwb)", pad=16)
lp.set_ylabel("Speedup")
lp.legend(subjects, loc='lower center')

dstools.savefig("4k_speedup_lwb_baseline")

### We noticed that speedup varies significantly between runs for small `numjobs`, so let's investigate this

In [None]:
data = df.copy()
data = data.set_index(["test_subject", "numjobs"])
data = data.loc[['zil-pmem', 'zil-lwb', 'async', 'fsdax'], ].copy()

color = {c: sns.color_palette()[i] for i, c in enumerate(test_subject_order)}

cov = (data.w_iops_stddev / data.w_iops_mean).unstack("test_subject")
display(cov)

fig = plt.figure(figsize=(10,10), tight_layout=True)
gs = fig.add_gridspec(5, 2)
ax = fig.add_subplot(gs[0:3, :])
cov.plot(ax=ax, title="Coefficient of Variation (stddev/mean)", color=color)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

data.w_iops_mean.unstack("test_subject").plot(ax=fig.add_subplot(gs[3:5, 0]), legend=False, title="IOPS mean", color=color)
data.w_iops_stddev.unstack("test_subject").plot(ax=fig.add_subplot(gs[3:5, 1]), legend=False, title="IOPS stddev", color=color)

display(data.w_iops_mean.unstack("test_subject"))
display(data.w_iops_stddev.unstack("test_subject"))



# fig, axes = plt.subplots(3, 1, squeeze=False, )
# cov.plot(ax=axes[0,0])
# data.w_iops_stddev.unstack("test_subject").plot(ax=axes[1,0])
# data.w_iops_mean.unstack("test_subject").plot(ax=axes[2,0])

#### Percentiles

In [None]:
data = df.copy()
data = data.set_index(['test_subject', 'numjobs'])
data = data[[f'clat_p{n9}' for n9 in ['5', '95', '99', '999', '9999']]].rename_axis("percentile", axis=1)
# display(data)

# data.reset_index().pivot(index=['percentile', 'numjobs'], columns='test_subject')
data = data.stack().rename("clat").reset_index()
display(data)
sns.relplot(kind='line',
                 data=data, x='numjobs', y='clat', col='percentile', col_wrap=3,
                 hue='test_subject', style='test_subject',
                 facet_kws={'sharey': False})

In [None]:
sns.relplot(kind='line',
            data=data, x='numjobs', y='clat', col='test_subject', col_wrap=3,
            hue='percentile', style='percentile',
            facet_kws={'sharey': False})

In [None]:
sns.relplot(kind='line',
            data=data[(data.test_subject == 'zil-pmem') | (data.test_subject == 'async') | (data.test_subject == 'zil-lwb')],
            x='numjobs', y='clat', col='test_subject', col_wrap=3,
            hue='percentile', style='percentile',
            facet_kws={'sharey': False, 'ylim':(0,200_000)})

In [None]:
tmp = data.set_index(['test_subject', 'numjobs', 'percentile']).unstack('percentile')
tmp = tmp.droplevel(0, axis=1)
tmp['p95_5_spread'] = tmp['clat_p95'] / tmp['clat_p5']

tmp['p9999_5_spread'] = tmp['clat_p9999'] / tmp['clat_p5']

display(tmp.p95_5_spread.unstack("test_subject"))
display(tmp.p9999_5_spread.unstack("test_subject"))

sns.relplot(kind='line',
            data=tmp,
            x='numjobs', y='p9999_5_spread', hue='test_subject',
           )
#             facet_kws={'sharey': False, 'ylim':(0,200_000)})


### Tail Latency

In [None]:
data = df.copy()
data = data.set_index(['test_subject', 'numjobs'])
data = data[[f'clat_p{n9}' for n9 in ['5', '95', '99', '999', '9999']]].rename_axis("percentile", axis=1)
# display(data)

# data.reset_index().pivot(index=['percentile', 'numjobs'], columns='test_subject')
data = data.stack().rename("latency [us]")
data = data / 1_000
data = data.reset_index()

sns.relplot(kind='line',
                 data=data[(data.test_subject == 'zil-pmem') | (data.test_subject == 'async') | (data.test_subject == 'zil-lwb')],
                 x='numjobs', y='latency [us]', col='test_subject', col_wrap=3,
                 hue='percentile',
#                  facet_kws={'sharey': False},
                 facet_kws={'sharey': True, 'ylim':(0, 400)},
                )

In [None]:
tmp = data.set_index(['test_subject', 'numjobs', 'percentile']).sort_index().copy()

# tmp['p95_5_spread'] = tmp['clat_p95'] / tmp['clat_p5']
# tmp['p9999_5_spread'] = tmp['clat_p9999'] / tmp['clat_p5']

tmp = tmp['latency [us]'].rename("")
fig, axes = plt.subplots(1, 3, squeeze=False, figsize=(15,4))
fig.suptitle("Completion Latencies By Percentile", fontsize=16, y=1.1)
tmp.loc['zil-lwb', ].unstack('percentile').plot(ax=axes[0, 0], legend=False, ylim=(0, 1000), title='zil-lwb')

tmp.loc['zil-pmem', ].unstack('percentile').plot(ax=axes[0, 1], legend=False, ylim=(0, 200), title='zil-pmem')

tmp.loc['async', ].unstack('percentile').plot(ax=axes[0, 2], legend=False, ylim=(0, 200), title='async')

# https://stackoverflow.com/questions/9834452/how-do-i-make-a-single-legend-for-many-subplots-with-matplotlib
fig.legend(*axes[0, 2].get_legend_handles_labels(), loc='lower center', bbox_to_anchor=(0.5, -0.3), ncol=5)
fig.supylabel('latency [us]', fontsize=16, x=0.05)

In [None]:


display(tmp.p95_5_spread.unstack("test_subject"))
display(tmp.p9999_5_spread.unstack("test_subject"))

# 4k sync write latency corridor

In [None]:
plt_abs_compare(["async", "zil-lwb", "zil-pmem"], "w_lat_mean", "ZIL-PMEM Latency Comparison (2)", "nano seconds",
               ylim=(1, None))
dstools.savefig("4k_zil_pmem_lat_2")

# Data Export For Use In Latency Breakdown

In [None]:
df.query("test_subject == 'devdax'").set_index("numjobs").filter(["w_lat_mean"]).to_json(orient="table")