In [None]:
import pandas as pd
import glob
import json
import dotted # https://pypi.org/project/dotted-notation/
import re
import matplotlib.pyplot as plt
import json

from pathlib import Path
import seaborn as sns

In [None]:
import lib.datasciencetoolbelt as dstools
from lib.resultstorage import ResultStorage

In [None]:
#%matplotlib qt
%matplotlib inline

In [None]:
dstools.setup({
    "seaborn_context": "talk",
    "savefig": {
        "enable": True,
        "dir": Path("./postprocess_results"),
    }
})
result_storage = ResultStorage(Path("./results"))

In [None]:
id_vars__dottedpath_and_shortname_and_type = [
#     ('result.identity', "benchmark", str),  
    ("storage_stack.identity", "storage_stack", str),
    ("result.fio_config.numjobs", "numjobs", int),
]
id_vars = [p[1] for p in id_vars__dottedpath_and_shortname_and_type]

def extract_id_var_values(output_json):
    d = output_json
    id_var_values = {}
    for dp, sn, ty in id_vars__dottedpath_and_shortname_and_type: 
        v = dotted.get(d, dp)
        if not v:
            raise Exception(f"{d['file']}: dotted path {dp} not found")
        if sn in id_var_values:
            raise Exception(f"duplicate shortname {sn}")
        try:
            id_var_values[sn] = ty(v)
        except ValueError as e:
            raise Exception(f"cannot parse v={v!r}") from e
    return id_var_values


def get_fio_write_metrics(output_json):
    d = output_json
    jobs = dotted.get(d, "fio_jsonplus.jobs")
    assert len(jobs) == 1
    j0 = jobs[0]
    jw = jobs[0]["write"]
    return {
        "w_iops_mean": jw["iops_mean"],
        "w_iops_stddev": jw["iops_stddev"],
        "w_lat_mean": dotted.get(jw, "lat_ns.mean"),
        "w_lat_stddev": dotted.get(jw, "lat_ns.stddev"),
    }


def to_row_dict(output_json):
    try:
        r = {}
        for k, v in dotted.get(output_json, "result.latency_analysis").items():
            assert k[0] == '@'
            k = k[1:] # strip leading @
            assert k not in r
            r[k] = v

        r = {
            **extract_id_var_values(output_json),
            "fio_metrics": get_fio_write_metrics(output_json['result']),
            
            # cpu stats
            "cpu": output_json["result"]["cpu_time"]["allcpu"],
            **r,
        }
        return r
    except:
        print(json.dumps(output_json))
        raise

rows = [to_row_dict(j) for j in result_storage.iter_results("zillwb_latency_analysis__v3")]

In [None]:
df = pd.DataFrame.from_dict(rows)
df = df.set_index(id_vars)
df = df.sort_index()
display(df)
# display(df / 1_000_000)
# compute zfs write breakdown

### Separate FIO and CPU Dataframes

In [None]:
tmp = df.copy()
df_fio = tmp['fio_metrics'].apply(pd.Series)
df_fio

In [None]:
tmp = df.copy()
tmp = tmp['cpu'].apply(pd.Series)
display(tmp)
# display(tmp)
cpu_total = tmp.sum(axis=1)
tmp['not_idle'] = cpu_total - tmp.idle
# second socket was disabled => half of total cpu time is idle time
tmp['utilization'] = tmp.not_idle / (cpu_total - (cpu_total/2))
tmp

### Remove `fio_metrics` and `cpu` from `df`

In [None]:
del df['fio_metrics']
del df['cpu']

# Show that LWB merging is not a thing

In [None]:
tmp = df.copy()
data = tmp[["zfs_write_count", "lwb_issue_count"]]
display(data)
data.plot.bar()

#  ZIO + PMEM latency when writing LWBs

In [None]:
tmp = df.copy()

tmp['interpolated_lwb_write_time'] = tmp.lwb_issue_count * tmp.last_lwb_latency

tmp['zio_overhead'] = tmp.interpolated_lwb_write_time - tmp.pmem_submit_bio

# display(tmp[["zfs_write_count", "lwb_issue_count"]])

data = tmp[[
    "zio_overhead",
    "pmem_submit_bio",
]]
lwb_write_time = data
display(lwb_write_time)

ax = lwb_write_time.plot.bar(stacked=True, figsize=(10,5))
ax.set_title("Interpolated LWB Write Time")

# TODO: does this make sense?
lwb_write_time_by_iops = lwb_write_time.div(df_fio.w_iops_mean, axis=0)
lwb_write_time_by_iops.plot.bar(stacked=True, figsize=(10,5)).set_title("Interpolated LWB Write Time By IOPS")


numjobs = zio_overhead_vs_pmem_time.index.to_frame()['numjobs']
display(numjobs)
lwb_write_time_by_numjobs = zio_overhead_vs_pmem_time.div(numjobs, axis=0)
ax = lwb_write_time_by_numjobs.plot.bar(stacked=True, figsize=(10,5))
ax.set_title("Interpolated LWB Write Time, By Numjobs")




# Latency Breakdown

In [None]:
tmp = df.copy()

write_count = tmp['zfs_write_count']
del tmp['zfs_write_count']

tmp['interpolated_lwb_write_time'] = tmp.lwb_issue_count * tmp.last_lwb_latency
del tmp['last_lwb_latency']
del tmp['lwb_issue_count']

tmp['zio_overhead'] = tmp.interpolated_lwb_write_time - tmp.pmem_submit_bio

tmp['async'] = tmp.zfs_write - tmp.zil_commit - tmp.zfs_log_write
tmp['zil_lwb_overhead'] = tmp.zil_commit - (
    tmp.zil_fill_commit_list 
    + tmp.zillwb_commit_waiter__issue_cv
    + tmp.zillwb_commit_waiter__timeout_cv
    + tmp.zillwb_lwb_write_issue
#     + tmp.zio_overhead
#     + tmp.pmem_submit_bio
)

data = tmp[[
    "async",
    "zfs_log_write",
    "zil_fill_commit_list",
    "zil_lwb_overhead",
    "zillwb_lwb_write_issue",
    "zillwb_commit_waiter__issue_cv",
    "zillwb_commit_waiter__timeout_cv",
#     "zillwb_remaining_overhead",
#     "zio_overhead",
#     "pmem_submit_bio",
]]
df_latbreakdown = data
df_latbreakdown

#  All In One Plot


In [None]:
relbreakdown = df_latbreakdown.copy()

total = relbreakdown.sum(axis=1)
display(relbreakdown)
relbreakdown = relbreakdown.div(total, axis=0)
# display(relbreakdown)

abs_by_iops = df_latbreakdown.div(df_fio.w_iops_mean, axis=0)

measurement_error = df_fio.w_lat_mean - abs_by_iops.copy().sum(axis=1)


rows = [
    "relbreakdown",
    "abs_by_iops",
    "fio_latency",
    "measurement_error",
    "lwb_write_time_by_iops",
    "lwb_write_time_by_numjobs",
    
#     "iops",
#     "latency_std",
]
nrows = len(rows)
ncols = 1
g, axes = plt.subplots(nrows, ncols, squeeze=False, figsize=(7.5 * ncols ,5 * nrows), gridspec_kw = {'hspace': 0.4})
for row in range(0, nrows):
    for col in range(0, ncols):
        
        storage_stack = {
            0: 'zfs-lwb-rs_0',
        }[col]
        
        row_name = rows[row]
       
        
        ax = axes[row, col]
        
        try:

            if row_name == "relbreakdown":
                relbreakdown.loc[storage_stack, ].plot.bar(ax=ax, stacked=True, ylim=(0, 1.1), legend=False)
                if col == ncols - 1:
                    ax.legend(loc="center left", bbox_to_anchor=(1,0.5))
            elif row_name == "abs_by_iops":
                abs_by_iops.loc[storage_stack, ].plot.bar(ax=ax, stacked=True, legend=False)
                if col == ncols - 1:
                    ax.legend(loc="center left", bbox_to_anchor=(1,0.5))
            elif row_name == "iops":
                df_fio.loc[storage_stack, "w_iops_mean"].plot(ax=ax)
            elif row_name == "fio_latency":
                df_fio.loc[storage_stack, "w_lat_mean"].plot.bar(ax=ax)
            elif row_name == "latency_std":
                df_fio.loc[storage_stack, "w_lat_stddev"].plot(ax=ax)
            elif row_name == "measurement_error":
                measurement_error.loc[storage_stack, ].plot.bar(ax=ax)
            elif row_name == "lwb_write_time_by_numjobs":
                lwb_write_time_by_numjobs.loc[storage_stack, ].plot.bar(ax=ax, stacked=True)
            elif row_name == "lwb_write_time_by_iops":
                lwb_write_time_by_iops.loc[storage_stack, ].plot.bar(ax=ax, stacked=True)
            else:
                raise Exception(f"unknown row name {row_name}")
            ax.set_title(f"{row_name}")
        
        except:
            print(row_name)
            raise
            
            
        