In [None]:
import pandas as pd
import glob
import json
import dotted # https://pypi.org/project/dotted-notation/
import re
import matplotlib.pyplot as plt

from pathlib import Path
import seaborn as sns
import lib.datasciencetoolbelt as dstools
from lib.resultstorage import ResultStorage

In [None]:
dstools.setup({
    "seaborn_context": "talk",
    "savefig": {
        "enable": True,
        "dir": Path("./postprocess_results"),
    }
})
result_storage = ResultStorage(Path("./results"))

#%matplotlib qt
%matplotlib inline


In [None]:
result_storage_prefix = "itxg_bypass_v4"

id_vars__dottedpath_and_shortname = [
    ("zfs_setup.module_args.zfs.zfs_zil_itxg_bypass", "itxg_bypass"),
    ("zfs_setup.module_args.zfs.zvol_request_sync", "zvol_request_sync"),
    ("fio_config.fsync_every", "fsync_every"),
    ("fio_config.numjobs", "numjobs")
]
id_vars = [p[1] for p in id_vars__dottedpath_and_shortname]

def extract_id_var_values(output_json):
    d = output_json
    id_var_values = {}
    for dp, sn in id_vars__dottedpath_and_shortname: 
        v = dotted.get(d, dp)
        if not v:
            raise Exception(f"{d['file']}: dotted path {dp} not found")
        if sn in id_var_values:
            raise Exception(f"duplicate shortname {sn}")
        id_var_values[sn] = v
    return id_var_values

def get_fio_write_metrics(output_json):
    d = output_json
    jobs = dotted.get(d, "fio_jsonplus.jobs")
    assert len(jobs) == 1
    j0 = jobs[0]
    jw = jobs[0]["write"]
    return jw

def to_fio_results_dict(output_json):
    jw = get_fio_write_metrics(output_json)
    return {
        **extract_id_var_values(output_json),
        "w_iops_mean": jw["iops_mean"],
        "w_iops_stddev": jw["iops_stddev"],
        "w_lat_mean": dotted.get(jw, "lat_ns.mean"),
        "w_lat_stddev": dotted.get(jw, "lat_ns.stddev"),
    }

def to_kstat_results_dict(output_json):
    d = output_json
    return {
        **extract_id_var_values(output_json),
        **d["zvol_stats"],
        **d["itxg_bypass_stats"],
        "bio_total": d["zvol_stats"]["submit_bio__zvol_write(with_taskq_if_enabled)"],
        "taskq_delay": dotted.get(d, 'zvol_stats.zvol_write__taskq_qdelay'),
        "assign_aquire": dotted.get(d, 'itxg_bypass_stats.assign__aquisition_total'),
        "assign_vtable": dotted.get(d, 'itxg_bypass_stats.assign__vtable'),
        "assign_total": dotted.get(d, 'itxg_bypass_stats.assign__total'),
        "commit_total": dotted.get(d, 'itxg_bypass_stats.commit__total'),
        "commit_aquire": dotted.get(d, 'itxg_bypass_stats.commit__aquire'),
        
    }

def to_cpu_dict(output_json):
    d = output_json
    return {
        **extract_id_var_values(output_json),
        **{f"cpu_{comp}": val for comp, val in dotted.get(d, "cpu_time.allcpu").items()},
    }

In [None]:
# compute `df_kstats`
rows = [to_kstat_results_dict(j) for j in result_storage.iter_results(result_storage_prefix)]
df_kstats = pd.DataFrame.from_dict(rows).set_index(id_vars).sort_index()

In [None]:
# compute `df_cpu`
rows = [to_cpu_dict(j) for j in result_storage.iter_results(result_storage_prefix)]
df = pd.DataFrame.from_dict(rows)
df = df.set_index(id_vars).sort_index()
df = df.rename_axis("metric", axis=1)
df = df.stack()
df_cpu = df
del df
df_cpu

In [None]:
## derive `df_cpu.notidle`
tmp = df_cpu.unstack("metric")
tmp["cpu_not_idle"] = tmp.sum(axis=1) - tmp.cpu_idle
df_cpu = tmp.stack()

In [None]:
# compute `df`
rows = [to_fio_results_dict(j) for j in result_storage.iter_results(result_storage_prefix)]
df = pd.DataFrame.from_dict(rows)
df = df.set_index(id_vars).sort_index()
df = df.rename_axis("metric", axis=1)
df = df.stack()
df

In [None]:
# a quick peek on the actual data in `df`
df.unstack("metric")

In [None]:
# define df_zfssetup
data = df.unstack(["itxg_bypass", "zvol_request_sync"])
data.columns = data.columns.map(lambda x: f"zil-pmem bypass={ {'1':'yes', '0': 'no'}[x[0]]} zvol_taskq={ {'1':'no', '0':'yes'}[x[1]] }")
data = data.rename_axis("zfs_setup", axis=1)
data = data.stack()
data
df_zfssetup = data
del data

#  Get An Idea Of CPU Utilization

In [None]:
tmp = df_cpu.unstack("metric")
total = tmp.cpu_not_idle + tmp.cpu_idle
utilization = tmp.cpu_not_idle / total
utilization *= 2 # we disabled socket 2 via isolcpus
utilization = pd.DataFrame({"utilization": utilization})
utilization

In [None]:
sns.relplot(data=utilization, height=8,
            row='zvol_request_sync', col='itxg_bypass', x='numjobs', y='utilization', hue='fsync_every')

# Analyze Perf 

##  All Data In One Plot

In [None]:
sns.relplot(data=df_zfssetup.unstack(["metric"]).reset_index(),
            height=12, kind='line',
            style='fsync_every',
            x ='numjobs', y='w_iops_mean', hue='zfs_setup')

In [None]:
sns.relplot(data=df_zfssetup.unstack(["metric"]).reset_index(),
            height=12, kind='line',
            #style='fsync_every', !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            x ='numjobs', y='w_iops_mean', hue='zfs_setup')

## Influence of `fsync_every` on IOPS in different setups

In [None]:
# data = df.reset_index()
# data["zfs_setup"] = list(zip(data.itxg_bypass, data.zvol_request_sync))
sns.relplot(data=df.unstack("metric").reset_index(),
            kind='line', height=6, col='zvol_request_sync', row='itxg_bypass',
            hue='fsync_every', x ='numjobs', y='w_iops_mean')

## IOPS comparison of the different setups for select numjobs

In [None]:
# data = df.reset_index()
# data["zfs_setup"] = list(zip(data.itxg_bypass, data.zvol_request_sync))
data = df_zfssetup.unstack("metric").query('numjobs in [1,4,5,8,12,16]').reset_index()
sns.relplot(data=data,
            kind='line',
            height=6, aspect=0.4,
            hue='zfs_setup',
            style='zfs_setup',
            markers=True,
            col='numjobs',
            x ='fsync_every', y='w_iops_mean')
sns.relplot(data=data,
            kind='line',
            height=6, aspect=0.4,
            hue='zfs_setup',
            style='zfs_setup',
            markers=True,
            col='numjobs',
            x ='fsync_every', y='w_lat_mean',
            facet_kws = {"ylim": (0, 100000)})

## Performance Impact Of ITXG Bypass

In [None]:
#data = df.unstack("metric").query('numjobs in [1,4,5,8,12,16]').reset_index()
data = df.unstack("metric").query('fsync_every in [1,4,16]').reset_index()
g = sns.relplot(data=data,
            kind='line',
            height=5,
            row='zvol_request_sync',
            col='fsync_every',
            style='itxg_bypass',
            hue='itxg_bypass',
            markers=True,
            x ='numjobs', y='w_iops_mean')
for (row_val, col_val), ax in g.axes_dict.items():
    ax.set_title(f"--fsync={col_val} zvrsync={row_val}")
# g.set_titles(col_template="--fsync={col_name}", row_template="{row_name}")

In [None]:
#data = df.unstack("metric").query('numjobs in [1,4,5,8,12,16]').reset_index()
data = df.unstack("metric").query('fsync_every in [1,4,16]').reset_index()
g = sns.relplot(data=data,
            kind='line',
            height=5,
            row='zvol_request_sync',
            col='fsync_every',
            style='itxg_bypass',
            hue='itxg_bypass',
            markers=True,
            x ='numjobs', y='w_lat_mean')
for (row_val, col_val), ax in g.axes_dict.items():
    ax.set_title(f"--fsync={col_val} zvrsync={row_val}")
# g.set_titles(col_template="--fsync={col_name}", row_template="{row_name}")

#  Latency Breakdown

In [None]:
df_kstats

In [None]:
data = df_kstats.query('fsync_every in [1,4,16]').reset_index()
g = sns.relplot(data=data,
            kind='line',
            height=5,
            row='zvol_request_sync',
            col='fsync_every',
            style='itxg_bypass',
            hue='itxg_bypass',
            markers=True,
            x ='numjobs', y='bio_total')
for (row_val, col_val), ax in g.axes_dict.items():
    ax.set_title(f"--fsync={col_val} zvrsync={row_val}")

In [None]:
data = df_kstats.query('fsync_every in [1,4,16]').reset_index()
g = sns.relplot(data=data,
            kind='line',
            height=5,
            row='zvol_request_sync',
            col='fsync_every',
            style='itxg_bypass',
            hue='itxg_bypass',
            markers=True,
            x ='numjobs', y='assign_vtable')
for (row_val, col_val), ax in g.axes_dict.items():
    ax.set_title(f"--fsync={col_val} zvrsync={row_val}")

In [None]:
data = df_kstats.query('fsync_every in [1,4,16]').reset_index()
data['overhead'] = data.bio_total - data.assign_vtable
g = sns.relplot(data=data,
            kind='line',
            height=5,
            row='zvol_request_sync',
            col='fsync_every',
            style='itxg_bypass',
            hue='itxg_bypass',
            markers=True,
            x ='numjobs', y='overhead')
for (row_val, col_val), ax in g.axes_dict.items():
    ax.set_title(f"--fsync={col_val} zvrsync={row_val}")

# => we probably want a prb_write() kstat to compute the overhead so that it's the same for both configurations

# How does the ITXG bypass's semaphore overhead behave?

In [None]:
data = df_kstats.query('fsync_every in [1,4,16]').reset_index()
# commit__total because it's essentially a single `mov` between aquisition and exit
data['semaphore_overhead'] = data.assign__aquisition_total + data.assign__exit + data.commit__total 
data['rel_semaphore_overhead'] = data.semaphore_overhead / data.bio_total
g = sns.relplot(data=data,
            kind='line',
            height=5,
            row='zvol_request_sync',
            col='fsync_every',
            style='itxg_bypass',
            hue='itxg_bypass',
            markers=True,
            x ='numjobs', y='rel_semaphore_overhead')
for (row_val, col_val), ax in g.axes_dict.items():
    ax.set_title(f"--fsync={col_val} zvrsync={row_val}")

# What overhead does the zvol taskq have?

In [None]:
data = df_kstats.query('fsync_every in [1,4,16]')
data = data.query('zvol_request_sync == "0"')
data = data.reset_index()
# commit__total because it's essentially a single `mov` between aquisition and exit
data['rel_taskq_delay'] = data.zvol_write__taskq_qdelay / data.bio_total
g = sns.relplot(data=data,
            kind='line',
            height=5,
#             col='fsync_every',
            hue='itxg_bypass',
            style='itxg_bypass',
            markers=True,
            x ='numjobs', y='rel_taskq_delay')