Changlog:
* v3.1:  Like v3 but with the ncommitters=4 results
* v3.2:
  * plus ZFSSyncDisabled
  * several features for `compare_benchmarks`
  * restructured grouping for better narative in evaluation
* v3.3:
  * benchmark runtimes tables to support claims in thesis
  * prettier legend rendering
* v4
  * same dataset as v3 but with dm-writecache high-watermark=1
    * low and high watermark = 0 maps all IO to NVMe, we don't want that
    * with high wartermark = 1 (that is 1%), and our partition sizes, we get a nice constant background write-back to the NVMe that, according to iostat, doesn't push it to is limit (tops at 90% utilization @ 8 numjobs in fio)
    * dm-writecache's lock contention is the bottleneck
  * additional data filtering / exploration, didn't really lead anywhere
* v4.1
  * prettier graphs / prep for thesis

In [None]:
import pandas as pd
import glob
import json
import dotted # https://pypi.org/project/dotted-notation/
import re
import matplotlib.pyplot as plt
import itertools
import numpy as np

from pathlib import Path
import seaborn as sns
import lib.datasciencetoolbelt as dstools
from lib.resultstorage import ResultStorage
from lib.helpers import merge_dicts

In [None]:
savefig_enable = True
seaborn_context = "paper"
savefig_dir = "./postprocess_results"
textwidth = 5.5 #inch

In [None]:
dstools.setup({
    "seaborn_context": seaborn_context,
    "savefig": {
        "enable": savefig_enable,
        "dir": Path(savefig_dir),
    }
})
result_storage = ResultStorage(Path("./results"))

In [None]:
result_storage_prefix = "app_benchmarks__v4"

def to_row(d):
    
    if dotted.get(d, 'result.exception') is True:
        #print(f"skipping benchmark that threw exception: {d}")
        return None
    
    if dotted.get(d, 'result.dummy') is not None:
        return None # skip dummy
    
  
    
    blockdev_stack = dotted.get(d, 'storage_stack.blockdev_stack.identity')
    if blockdev_stack is None:
        blockdev_stack = 'native'
    
    storage_stack = dotted.get(d, 'storage_stack.fstyp')
    if storage_stack is not None:
        fstyp = storage_stack
        storage_stack += "__on__" + blockdev_stack
        storage_stack += "__dax_" + str(dotted.get(d, 'storage_stack.mount_dax'))
        daxmount = dotted.get(d, 'storage_stack.mount_dax')
    else:
        storage_stack = dotted.get(d, 'storage_stack.identity')
        m = re.match(r"(?P<fstyp>zfs-(pmem|lwb|sync_disabled))-", storage_stack)
        assert m
        fstyp = m['fstyp']
        daxmount = None
    assert storage_stack is not None
    
    benchmark_identity = dotted.get(d, 'result.identity')
    
    # fixup mariadb-sysbench-oltp_insert
    if benchmark_identity == "mariadb-sysbench-oltp_insert":
        m = d['result']['result']['metrics']
        m['SQL_statistics'] = m['SQL statistics']
        del m
    
    # determine variable_dotted_str and metric_dotted_strs
    try:
        variable_dotted_str, metric_dotted_strs = {
            "filebench-varmail": ('result.config.vars.nthreads', ['result.result.metrics.summary_ops_per_sec']),
            "filebench-oltp": ('result.config.vars.ndbwriters', ['result.result.metrics.summary_ops_per_sec']),
            "redis-SET": ('result.config.clients', ['result.result.main.metrics.rps']),
            "rocksdb-fillsync": ('result.config.threads', ['result.result.metrics.fillsync.ops_per_sec']),
            "sqlite-bench": (None, ['result.result.metrics.fillrandsync.micros_per_op']),
            "mariadb-sysbench-oltp_insert": ('result.config.threads', ['result.result.metrics.SQL_statistics.transactions_per_sec']),
            "fio-4k-sync-rand-write--size-per-job": ('result.fio_config.numjobs', ['result.fio_jsonplus.jobs[0].write.iops']),
            "fio-4k-sync-rand-write--size-div-by-numjobs": ('result.fio_config.numjobs', ['result.fio_jsonplus.jobs[0].write.iops']),
        }[benchmark_identity]
    except KeyError:
        raise Exception(f"unknown_benchmark {benchmark_identity}:\n{d!r}")
    
    # determine variable_value
    if variable_dotted_str:
        variable_value = dotted.get(d, variable_dotted_str)
        if variable_value is None:
            print(benchmark_identity)
            raise Exception(str(d))
    else:
        variable_value = None
        
    # determine metric_value
    if not metric_dotted_strs:
        raise Exception(str(d))
    metrics = {}
    for mds in metric_dotted_strs:
        mv = dotted.get(d, mds)
        if not mv:
            raise Exception(f"mds: {mds}\n{d!r}")
        if len(metrics) == 0:
            metrics['primary_metric'] = mds
            metrics['primary_metric_value'] = mv
        metrics[mds] = mv 
        
    bm_runtime_dotted_str = {
        "redis-SET": 'result.result.main.runtime_secs',
        "rocksdb-fillsync": 'result.result.runtime',
    }.get(benchmark_identity, None)
    if bm_runtime_dotted_str is not None:
        bm_runtime = dotted.get(d, bm_runtime_dotted_str)
        assert bm_runtime is not None
    else:
        bm_runtime = None
    
    return {
        "filepath": d['file'],
        "storage_stack": storage_stack,
        "blockdev_stack": blockdev_stack,
        "daxmount": daxmount,
        "fstyp": fstyp,
        "benchmark": benchmark_identity,
        "variable": variable_dotted_str,
        "variable_value": variable_value,
        "actual_runtime": bm_runtime,
        **metrics,
    }

def try_to_row(d):
    try:
        return to_row(d)
    except:
        print(d)
        raise

rows = [try_to_row(j) for j in result_storage.iter_results(result_storage_prefix)]
rows = list(filter(lambda d: d is not None, rows))
rows = list(itertools.chain(rows))
df = pd.DataFrame(rows)
df

# df = df[df.blockdev_stack.map(lambda bd: "writecache" not in bd)]
# for l in list(df.filepath.map(lambda r: f'cp {r} {r.replace("app_benchmarks__v3", "app_benchmarks__v4")}')):
#     print(l)

In [None]:
set(df['benchmark'])

In [None]:
set(df['storage_stack'])

In [None]:
set(df['blockdev_stack'])

In [None]:
#data = df.copy().query('variable_value in [1, @nan] and benchmark in ["redis-SET", "filebench-varmail"]')
nan = np.nan
data = df.copy().query('variable_value in [1, @nan]')
# data = df.copy().query('variable_value in [1, @nan] and benchmark in ["redis-SET", "filebench-varmail", "rocksdb-fillsync"]')
# display(data)
data = data.set_index(['benchmark', 'storage_stack', 'variable_value'], verify_integrity=True).sort_index()
data

# Ensure Redis And RocksDB Runtimes Are Acceptable

In [None]:
# find all runs that took < 10 seconds
df[df.actual_runtime.map(lambda rt: rt is not None and rt < 10)]

Ok, the only critical candidate is `zfs-pmem-rs_0-byp_0-nc_3` in rocksdb-fillsync.
And zfs-sync-disabed.
We address that in the text.

In [None]:
df[df.benchmark == 'redis-SET'].groupby('variable_value').actual_runtime.describe()

In [None]:
df[df.benchmark == 'rocksdb-fillsync'].groupby('variable_value').actual_runtime.describe()

In [None]:
df[df.benchmark == 'rocksdb-fillsync'].actual_runtime.describe()

In [None]:
df[df.benchmark == 'rocksdb-fillsync'].actual_runtime.quantile(0.1)

# Main Comparison

In [None]:
benchmark_metadata = {
        "filebench-oltp": ("fb-oltp", 1/1_000, r'ops/s $\times 10^3$'),
        "filebench-varmail": ("fb-varmail", 1/1_000, r'ops/s $\times 10^3$'),
        'fio-4k-sync-rand-write--size-div-by-numjobs': ('fio-fixed', 1/1_000, r'IOPS $\times 10^3$'),
         'fio-4k-sync-rand-write--size-per-job': ('fio-growing', 1/1_000, r'IOPS $\times 10^3$'),
        'mariadb-sysbench-oltp_insert': ('MariaDB',1/1_000,  r'txn/s $\times 10^3$'),
        'rocksdb-fillsync': ('RocksDB',1/1_000, r'ops/s $\times 10^3$'),  
        'redis-SET': ('Redis-SET',1/1_000,  r'req/s $\times 10^3$'),
    }
def rename_benchmark(benchmark):
    return benchmark_metadata[benchmark][0]
def benchmark_scale_value(benchmark, v):
    return v * benchmark_metadata[benchmark][1]
def benchmark_unit(benchmark):
    return benchmark_metadata[benchmark][2]
import itertools
import re
def rename_storage_stack(ss):
    
    if ss == "zfs-pmem-rs_0-byp_0-nc_3":
        return "zfs-pmem"
    if ss == "zfs-lwb-rs_0":
        return "zfs-lwb"
    if ss == "zfs-sync_disabled-rs_0":
        return "zfs-async"
    
    # that's the default, other nc_* values will still show
    ss = ss.replace("-nc_3", "")
    
    # shorter name, already used in other parts of thesis text
    ss = ss.replace("-sync_disabled", "-async")
    
    for (z,zil) in itertools.product(["zfs", "zvol"], ['pmem', 'lwb', 'async']):
        ss = ss.replace(f"{z}-{zil}", f"{z}-{zil}")
    for flag in ["rs", "byp", "nc"]:
        ss = ss.replace(f"-{flag}", f",{flag}")
    ss = ss.replace("__on__", " on ")
    
    m = re.match(r"(xfs|ext4)(.*)__dax_(False|True)", ss)
    if m:
        dax = "-dax" if m[3] == "True" else ""
        ss = f"{m[1]}{dax}{m[2]}"
    
    ss = ss.replace("_", "=")
    return ss

list(map(rename_storage_stack, set(df['storage_stack'])))

In [None]:
def compare_benchmarks(storage_stacks, baseline=None, _only_display_data=False, yticks_rel=None, barwidth=1000, display_tables=False, subplots_kw={}, allow_nc_neq_3=False, legend_ncol=2):
    
#     uncomment this if absolute values are of interest
#     if baseline:
#         compare_benchmarks(storage_stacks, baseline=None, _only_display_data=True, display_tables=display_tables, subplots_kw=subplots_kw)
    
    data = df.copy()
    
    # filter out sqlite-bench since it has no scaling factor
    data = data.query('benchmark not in ["sqlite-bench"]').copy()
    
    # scale as specified
    def scale_metric(row):
        row['primary_metric_value'] = benchmark_scale_value(row['benchmark'], row['primary_metric_value'])
        return row
    data = data.apply(scale_metric,  axis=1, result_type='expand')
    
#     if not allow_nc_neq_3:
#         data = data[data.storage_stack.map(lambda v: "-nc_" not in v or "-nc_3" in v)]

    # filter storage stacks by parameter
    data = data[data.storage_stack.map(lambda v: v in storage_stacks)]
    
    # now rename storage stacks
    data['storage_stack'] = data.storage_stack.map(rename_storage_stack)
    baseline = baseline if not baseline else rename_storage_stack(baseline)
    
    data = data.set_index(['benchmark', 'storage_stack', 'variable_value'], verify_integrity=True)
    
    tmp = data
        
    # only show 1, 4, 8
    tmp = tmp.query('variable_value in [1, 4, 8]')
        
    if display_tables:
        displaytable = tmp['primary_metric_value'].copy().unstack("benchmark").reorder_levels(['variable_value', 'storage_stack']).sort_index()
        display('displaytable:', displaytable)
        if baseline:
            display('1/displaytable', 1/displaytable)
    
    if _only_display_data:
        return
    
    
    storage_stacks = sorted(list(set(tmp.index.get_level_values('storage_stack'))))
    variable_values = sorted(list(set(tmp.index.get_level_values('variable_value'))))
    benchmarks = sorted(list(set(tmp.index.get_level_values('benchmark'))))
    
    color = {ss: sns.color_palette(None, n_colors=len(storage_stacks))[ssi] for ssi, ss in enumerate(storage_stacks)}
        
    subplots_kw = merge_dicts(dict(
        figsize=(textwidth, 1*len(benchmarks)),
        gridspec_kw={"hspace":0.25, 'wspace': 0.05}
    ), subplots_kw)
    
    fig, axes = plt.subplots( len(benchmarks), len(variable_values), **subplots_kw)
    
    display(storage_stacks, variable_values, benchmarks)
    
    
    # collect artists to draw legend (their color is fixed by the ax.bar call's color param)
    most_recent_artists = {}
    
    for row, b in enumerate(benchmarks):
        ylim=(0, 1.05 * tmp.loc[b, slice(None), slice(None)]['primary_metric_value'].max())
        for col, vv in enumerate(variable_values):
            ax = axes[row, col]
            
            ax.set_ylim(ylim)
            
            xwidth = 10
            ax.set_xlim((-0.1*xwidth, xwidth *1.1))
            
            fontsize = 8
            
            
            for si, ss in enumerate(storage_stacks):
                try:
                    x = si * xwidth/len(storage_stacks)
                    x += 0.5 * xwidth/len(storage_stacks)
                    
                    y = tmp.loc[b, ss, vv]['primary_metric_value']
                    barcontainer = ax.bar(x, y,
                        label=ss,
                        color=color[ss],
                        width=min(
                            fig.dpi_scale_trans.transform((barwidth, 0))[0],
                            0.9 * xwidth /  len(storage_stacks)
                            )
                        )
                    most_recent_artists[ss] = barcontainer
                    assert(len(barcontainer.patches) == 1)
                    bar = barcontainer.patches[0] # https://matplotlib.org/stable/api/_as_gen/matplotlib.patches.Rectangle.html#matplotlib.patches.Rectangle
                    
                    if baseline:
                        bl = tmp.loc[b, baseline, vv]['primary_metric_value']
                        speedup = y / bl
                        
#                         print(b, vv, ss, bar.get_height(),  ax.transLimits.transform((0, y)))
                        verticalalignment, ann_color = ('top', 'white') if ax.transLimits.transform((0, y))[1] > 0.5 else ('bottom', 'black')
                        
                        ax.annotate(speedup.round(2), (x, y),
                                    horizontalalignment='center', verticalalignment=verticalalignment,
                                    color=ann_color,
                                    fontsize=fontsize, rotation=90)
                except KeyError:
                    pass # print(b, vv, ss)
            
            ax.set_xticklabels([]) # no meaning
            
            
            if col == 0:
                ax.set_ylabel(rename_benchmark(b), fontsize=fontsize, rotation=90)
            
            if col == len(variable_values) - 1:
                ax.yaxis.tick_right()
                ax.tick_params(labelsize=fontsize)
                ax.yaxis.set_label_position("right")
                ax.set_ylabel(benchmark_unit(b), fontsize=fontsize, rotation=90)
            else:
                ax.set_yticklabels([])
                
            # 3 ticks on the y axis
            ax.locator_params(axis='y', min_n_ticks=3)
                
            if row == 0:
                assert round(vv) == vv
                ax.set_title(f"Scaling Factor {round(vv)}", fontsize=fontsize)
                
    fig.legend(handles=list(most_recent_artists.values()), ncol=legend_ncol,
               loc='center', bbox_to_anchor=(0.5, 0.075),
               fontsize=7)
    

# First take a hard look on `ncommitters`, `zvol_request_sync` and `bypass` to determine which we include in the subsequent graphs. This graph is not included in the publication.

* nc3 vs nc4 (neighboring bars): hardly any difference, use nc3 because it's more cpu-efficient
* rs0 vs rs1
  * generally not much of a difference
  * ext4 filebench-varmail performs significantly better with rs0 vs rs1
* byp0 vs byp1: effect noticable at higher thread counts


Note that the effect of ITXG bypass in the Block-Device-Proivder-Role is noticable but not significant (maybe this changes if we add a `fio` benchmark?)

In [None]:
tmp = {
#  'ext4__on__devpmem__dax_False',
#  'ext4__on__devpmem__dax_True',
#  'ext4__on__dm-writecache__dax_False',
#  'ext4__on__zvol-lwb-rs_0__dax_False',
#  'ext4__on__zvol-lwb-rs_1__dax_False',
 'ext4__on__zvol-pmem-rs_0-byp_0-nc_4__dax_False',
 'ext4__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False',
 'ext4__on__zvol-pmem-rs_0-byp_1-nc_4__dax_False',
 'ext4__on__zvol-pmem-rs_0-byp_1-nc_3__dax_False',
 'ext4__on__zvol-pmem-rs_1-byp_0-nc_4__dax_False',
 'ext4__on__zvol-pmem-rs_1-byp_0-nc_3__dax_False',
 'ext4__on__zvol-pmem-rs_1-byp_1-nc_4__dax_False',
 'ext4__on__zvol-pmem-rs_1-byp_1-nc_3__dax_False',
#  'xfs__on__devpmem__dax_False',
#  'xfs__on__devpmem__dax_True',
#  'xfs__on__dm-writecache__dax_False',
#  'xfs__on__zvol-lwb-rs_0__dax_False',
#  'xfs__on__zvol-lwb-rs_1__dax_False',
 'xfs__on__zvol-pmem-rs_0-byp_0-nc_4__dax_False',
 'xfs__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False',
 'xfs__on__zvol-pmem-rs_0-byp_1-nc_4__dax_False',
 'xfs__on__zvol-pmem-rs_0-byp_1-nc_3__dax_False',
 'xfs__on__zvol-pmem-rs_1-byp_0-nc_4__dax_False',
 'xfs__on__zvol-pmem-rs_1-byp_0-nc_3__dax_False',
 'xfs__on__zvol-pmem-rs_1-byp_1-nc_4__dax_False',
 'xfs__on__zvol-pmem-rs_1-byp_1-nc_3__dax_False',
      
#  'zfs-lwb-rs_0',
#  'zfs-lwb-rs_1',
#  'zfs-pmem-rs_0-byp_0-nc_2',
#  'zfs-pmem-rs_0-byp_0-nc_3',
#  'zfs-pmem-rs_0-byp_1-nc_2',
#  'zfs-pmem-rs_0-byp_1-nc_3',
#  'zfs-pmem-rs_1-byp_0-nc_2',
#  'zfs-pmem-rs_1-byp_0-nc_3',
#  'zfs-pmem-rs_1-byp_1-nc_2',
#  'zfs-pmem-rs_1-byp_1-nc_3'
}
compare_benchmarks(list(tmp), baseline='ext4__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False',
                   subplots_kw={'figsize': (20,15)},
                  allow_nc_neq_3=True)

In [None]:
tmp = {
 'zfs-pmem-rs_0-byp_0-nc_3',
 'zfs-pmem-rs_0-byp_0-nc_4',
 'zfs-pmem-rs_0-byp_1-nc_3',
 'zfs-pmem-rs_0-byp_1-nc_4',
 'zfs-pmem-rs_1-byp_0-nc_3',
 'zfs-pmem-rs_1-byp_0-nc_4',
 'zfs-pmem-rs_1-byp_1-nc_3',
 'zfs-pmem-rs_1-byp_1-nc_4'
}
compare_benchmarks(list(tmp), baseline='zfs-pmem-rs_0-byp_0-nc_3',
                   subplots_kw={'figsize': (20,15)},
                  allow_nc_neq_3=True)

=> Go with ncommitters = 3, but compare the different rs and byp settings later

# ZIL-PMEM vs ZIL-LWB

* ZIL-PMEM outperforms ZIL-LWB in all workloads, and very significantly in most of them
* ZIL-LWB throughput increases with growing `variable_value`
* ZIL-PMEM is with one exception, less than 50% slower less than 50% slower than 

In [None]:
tmp = {   
 'zfs-lwb-rs_0',
 'zfs-pmem-rs_0-byp_0-nc_3',
}
compare_benchmarks(list(tmp), baseline='zfs-lwb-rs_0', barwidth=0.02, subplots_kw={"figsize": (0.8 * textwidth, 6) }, yticks_rel=[0,2,4,6,8], display_tables=True)
dstools.savefig("appbench__zilpmem_vs_lwb")
# compare_benchmarks(list(tmp), display_tables=True)

# ZFS{lwb,pmem} with PMEM SLOG vs. ZFS-async vs. {ext4,xfs}{nodax,dax} only on PMEM

* ZIL-PMEM has very high speedup over ZIL-LWB
* In the small sync-IO heavy workloads (redis-SET, rocksdb-fillsync), ZIL-PMEM significantly outperforms the other candidates, even though they are PMEM only
* For heavy sync IO (fio), the linux filesystems are better
  * have significantly lower guarantees (no data journaling)
* ext4 and xfs's DAX optimizations are significant
* With one exception, ZIL-PMEM remains within 50% of `ZFS-async` performance

In [None]:
tmp = {
    
 'ext4__on__devpmem__dax_False',
 'ext4__on__devpmem__dax_True',

 'xfs__on__devpmem__dax_False',
 'xfs__on__devpmem__dax_True',
    
 'zfs-lwb-rs_0',
 'zfs-pmem-rs_0-byp_0-nc_3',
  'zfs-sync_disabled-rs_0',

}
compare_benchmarks(list(tmp), baseline='zfs-pmem-rs_0-byp_0-nc_3', display_tables=True, legend_ncol=3)
dstools.savefig("appbench__zfs_vs_linuxfs_on_pmem")

# ZVOLs + Linux Filesystem (zvol-lwb vs zvol-pmem with different zvrs,bypass settings)

* ZIL-PMEM delivers significant speedup over ZIL-LWB
* Effect of zvrs:
    * neutral to negative for ZIL-LWB
    * hugely beneficial for ZIL-PMEM
        * warrants re-design of ZVOL (blk-mq, future work)
* Effect of Bypass
  * shows to some extent for xfs
  * For ext4 in `filebench-oltp` at 8 threads, there is a huge decline

In [None]:
tmp = {
#  'ext4__on__dm-writecache__dax_False',
 'ext4__on__zvol-lwb-rs_0__dax_False',
 'ext4__on__zvol-lwb-rs_1__dax_False',
    
 'ext4__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False',
 'ext4__on__zvol-pmem-rs_0-byp_1-nc_3__dax_False',
 'ext4__on__zvol-pmem-rs_1-byp_0-nc_3__dax_False',
 'ext4__on__zvol-pmem-rs_1-byp_1-nc_3__dax_False',
    
#  'xfs__on__dm-writecache__dax_False',
 'xfs__on__zvol-lwb-rs_0__dax_False',
 'xfs__on__zvol-lwb-rs_1__dax_False',
    
 'xfs__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False',
 'xfs__on__zvol-pmem-rs_0-byp_1-nc_3__dax_False',
 'xfs__on__zvol-pmem-rs_1-byp_0-nc_3__dax_False',
 'xfs__on__zvol-pmem-rs_1-byp_1-nc_3__dax_False',
    
#   'xfs__on__zvol-sync_disabled-rs_0__dax_False',
#   'ext4__on__zvol-sync_disabled-rs_0__dax_False',

}

compare_benchmarks(tmp, subplots_kw={'figsize': (15,12)})
plt.show()

# compare_benchmarks(list(filter(lambda s: "ext4" in s, tmp)), baseline='ext4__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False',  subplots_kw=subplots_kw)
compare_benchmarks(list(filter(lambda s: "xfs" in s, tmp)), baseline='xfs__on__zvol-pmem-rs_0-byp_0-nc_3__dax_False', display_tables=True)
dstools.savefig("appbench__xfs_zvol")

In [None]:
tmp = df.copy()
tmp = tmp[(tmp.benchmark == 'fio-4k-sync-rand-write--size-per-job') | (tmp.benchmark ==  'fio-4k-sync-rand-write--size-div-by-numjobs') ]
tmp = tmp[tmp.storage_stack.map(lambda r: "xfs" in r and "zvol" in r)]
# display(tmp.columns)
tmp = tmp.pivot(index=['benchmark', 'variable_value'], columns=['storage_stack'], values=['primary_metric_value']).T
display(tmp)

### Compare ZFS native to XFS on ZVOL (didn't lead anywhere)

The idea was to show the benefits of ZFS's integrated design / support the claim for write amplification through the block-layer abstraction by example of `redis-SET` and `rocksdb-fillsync`.
These benchmarks perform small writes but XFS, even though it writes a logical log internally, experiences write amplification because it only sees a block device instead of PMEM.

=> By comparing ZFS (integrated, PMEM-aware design) vs ZVOL+XFS, we were hoping to see some constant relative speedup aat a given scaling factor. That was not the case, and it's probably due to bad assumptions. A better comparison would be XFS with Christoph Hellwig's incomplete DAX-aware logging patches. https://lkml.org/lkml/2020/9/17/77

In [None]:
tmp = df.copy()

tmp = tmp.query('fstyp in ["ext4", "xfs", "zfs-pmem"]')
tmp = tmp.query('fstyp != "zfs-pmem" or storage_stack == "zfs-pmem-rs_0-byp_0-nc_3"')
tmp = tmp.set_index(["fstyp", "daxmount", "blockdev_stack", "benchmark", "variable_value"], verify_integrity=True)

# tmp = tmp[["primary_metric_value"]].unstack("daxmount")
# tmp = tmp.droplevel(0, axis=1)
# tmp = tmp.dropna()
display(set(tmp.index.get_level_values('blockdev_stack')))
# display(tmp)
# tmp = tmp.loc[slice(None), slice(None), ["native", "devpmem", "zvol-pmem-rs_0-byp_0-nc_3"], slice(None), slice(None)][["primary_metric_value"]]
# tmp.unstack("fstyp").loc[slice(None), slice(None), ['rocksdb-fillsync', 'fio-4k-sync-rand-write--size-per-job']]
# .loc[slice(None), slice(None), 'redis-SET', 1, ]
tmp = tmp[['primary_metric_value']]
# display(tmp.index)
benches = ['rocksdb-fillsync', 'fio-4k-sync-rand-write--size-per-job']
benches = slice(None)
a = tmp.loc['zfs-pmem', slice(None), 'native', benches].unstack('benchmark')
b = tmp.loc['xfs', slice(None), 'zvol-pmem-rs_0-byp_0-nc_3', benches].unstack('benchmark')
display(a)
display(b)
a.droplevel([0])/b.droplevel([0])

=> inconclusive, I expect that if we had `variable_values 1,2,3,4` we'd see  we'd see 

# ZFS vs. LinuxFS+dm-writecache

* ZIL-PMEM has significantly better latency than dm-writecache
* Scalability to higher concurrency/throughput: still better and/or comparable
  * expect that at some point, high data rate / overwrite rate will make dm-writecache more efficient


* (only need to look at zfs rs0 byp0 since neither are relevant for ZPL (only ZVOL))


In [None]:
dm_writecache_benchmarks = {
 'ext4__on__dm-writecache__dax_False',
 'xfs__on__dm-writecache__dax_False',
 'zfs-lwb-rs_0',
 'zfs-pmem-rs_0-byp_0-nc_3',    
# 'zfs-sync_disabled-rs_0',
}
compare_benchmarks(list(dm_writecache_benchmarks), barwidth=0.015, subplots_kw={"figsize": (0.75*textwidth, 6)}, baseline='zfs-pmem-rs_0-byp_0-nc_3', display_tables=False)
dstools.savefig("appbench__dm_writecache")

### The interesting absolute numbers for the Linux filesystems

In [None]:
tmp = df.copy()
tmp = tmp[tmp.storage_stack.map(lambda s: "devpmem" in s or "writecache" in s)]
# tmp = tmp[tmp.storage_stack.map(lambda s: "xfs" in s)]
tmp = tmp[tmp.storage_stack.map(lambda s: "dax_" in s)]
tmp['dax'] = tmp.storage_stack.map(lambda s: s[s.find("dax_"):])
tmp['stack'] = tmp['blockdev_stack'] + '--' + tmp['dax']
tmp['filesystem'] = tmp.storage_stack.map(lambda stack: stack[0:stack.find("_")])
tmp = tmp[tmp.benchmark.map(lambda s: "redis" in s or "rocksdb" in s or 'fio' in s)]
tmp = tmp.set_index(['filesystem', 'stack', 'benchmark', 'variable_value'], verify_integrity=True)['primary_metric_value']
# display(tmp)
tmp = tmp.unstack("stack")
# tmp['delta_abs'] = tmp.devpmem - tmp['dm-writecache']
# tmp['delta_rel'] = (tmp.devpmem / tmp['dm-writecache']).round(2)
display(tmp)

### dm-writecache scalability

In [None]:
storage_stacks = {
 'ext4__on__dm-writecache__dax_False',
 'xfs__on__dm-writecache__dax_False',
#  'ext4__on__devpmem__dax_False',
#  'xfs__on__devpmem__dax_False',
#  'ext4__on__devpmem__dax_True',
#  'xfs__on__devpmem__dax_True',
 'zfs-lwb-rs_0',
 'zfs-pmem-rs_0-byp_0-nc_3',        
}

benchmarks = [
    'rocksdb-fillsync',
    'fio-4k-sync-rand-write--size-per-job',
]

data = df.copy()
data = data.set_index(['benchmark', 'storage_stack', 'variable_value'], verify_integrity=True)


fig, axes = plt.subplots(1, 2, figsize=(8, 3.5))
for col, b in enumerate(benchmarks):
    tmp = data.copy()
    tmp = tmp.loc[b, storage_stacks, slice(None)].droplevel(0).sort_index()
    tmp = tmp['primary_metric_value'].unstack('variable_value')
    tmp /= 1_000
#     tmp = tmp.div(tmp[1.0], axis=0)
#     ax = tmp.plot.barh(ax=axes[col], legend=False, xlim=(0,4.1), xticks=[0,1,2,3,4], title=b)
    ax = tmp.plot.barh(ax=axes[col], legend=False, title=b)
    if col != 0:
        ax.set_yticklabels([])
        ax.set_ylabel("")    

# last ax is legend
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', bbox_to_anchor=(0.5, -0.2), title='variable value', ncol=3)
# fig.suptitle("Speedup over variable_value=1", fontsize=16)
fig.tight_layout()


#     plt.legend(loc='center left', title='variable value', bbox_to_anchor=(1, 0.5))
#     plt.xlabel('Speedup')
#     plt.title(f"Speedup in IOPS for {benchmark}")


In [None]:
storage_stacks = {
#  'ext4__on__dm-writecache__dax_False',
 'xfs__on__dm-writecache__dax_False',
#  'ext4__on__devpmem__dax_False',
 'xfs__on__devpmem__dax_False',
#  'ext4__on__devpmem__dax_True',
 'xfs__on__devpmem__dax_True',
#  'zfs-lwb-rs_0',
 'zfs-pmem-rs_0-byp_0-nc_3',        
}

benchmark = 'rocksdb-fillsync'
benchmark = 'fio-4k-sync-rand-write--size-per-job'

tmp = df.copy()
tmp = tmp.set_index(['benchmark', 'storage_stack', 'variable_value'], verify_integrity=True)
tmp = tmp.loc[benchmark, storage_stacks, slice(None)].droplevel(0).sort_index()
tmp = tmp['primary_metric_value'].unstack('storage_stack')
tmp /= 1_000
tmp = tmp.plot.barh()
plt.legend(loc='center left', title='storage stack', bbox_to_anchor=(1, 0.5))
plt.xlabel('kIOPS')


# Dumpster for some data analysis that didn't lead anywhere but that we don't want to delete ATM

In [None]:
tmp = df.copy()
tmp = tmp[tmp.storage_stack.map(lambda s: "devpmem" in s or "writecache" in s)]
# tmp = tmp[tmp.storage_stack.map(lambda s: "xfs" in s)]
tmp = tmp[tmp.storage_stack.map(lambda s: "dax_" in s)]
tmp['dax'] = tmp.storage_stack.map(lambda s: s[s.find("dax_"):])
tmp = tmp[tmp.dax == "dax_False"]
tmp['filesystem'] = tmp.storage_stack.map(lambda stack: stack[0:stack.find("_")])
tmp = tmp[tmp.benchmark.map(lambda s: "redis" in s or "rocksdb" in s)]
tmp = tmp.set_index(['filesystem', 'blockdev_stack', 'benchmark', 'variable_value'], verify_integrity=True)['primary_metric_value']
# display(tmp)
tmp = tmp.unstack("blockdev_stack")
# tmp['delta_abs'] = tmp.devpmem - tmp['dm-writecache']
tmp['delta_rel'] = (tmp.devpmem / tmp['dm-writecache']).round(2)
display(tmp)
tmp.loc[slice(None), 'redis-SET', :].plot.bar()
print(tmp.to_latex())

In [None]:
linux_filesystems = tmp.copy()

In [None]:
tmp = df.copy()
tmp = tmp[tmp.storage_stack == "zfs-sync_disabled-rs_0"]
tmp['stack'] = 'zfs'
tmp['filesystem'] = 'zfs'
tmp = tmp[tmp.benchmark.map(lambda s: "redis" in s or "rocksdb" in s)]
tmp = tmp.set_index(['filesystem', 'stack', 'benchmark', 'variable_value'], verify_integrity=True)['primary_metric_value']
# display(tmp)
tmp = tmp.unstack("stack")
# tmp['delta_abs'] = tmp.devpmem - tmp['dm-writecache']
# tmp['delta_rel'] = (tmp.devpmem / tmp['dm-writecache']).round(2)
display(tmp)
tmp.loc[slice(None), 'redis-SET', :].plot.bar()
print(tmp.to_latex())

In [None]:
zfs = tmp.copy()

In [None]:
tmp = pd.concat([linux_filesystems, zfs])
display(tmp)
tmp.loc[slice(None), 'rocksdb-fillsync', :].plot.bar(figsize=(10, 10))