In [None]:
from keyname import keyname as kn
from matplotlib import pyplot as plt
from nbmetalog import nbmetalog as nbm
import numpy as np
import pandas as pd
import seaborn as sns
from teeplot import teeplot as tp

In [None]:
from conduitpylib.utils import consolidate_merge

In [None]:
nbm.print_metadata()

# Get Data

In [None]:
df_inlet = pd.read_csv(
    'https://osf.io/crwgh/download',
    compression='gzip',
).drop(
    columns=['././@LongLink'],
).dropna(
    subset=['Process Instance UUID'],
)
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'df_inlet'
)))

In [None]:
df_outlet = pd.read_csv(
    'https://osf.io/kecvy/download',
    compression='gzip',
).drop(
    columns=['././@LongLink'],
).dropna(
    subset=['Process Instance UUID'],
)
nbm.print_dataframe_summary(*eval(nbm.nvp_expr(
    'df_outlet'
)))

In [None]:
df = consolidate_merge(
    df_inlet,
    df_outlet,
    on=['Process Instance UUID', 'Update'],
    suffixes=(' Inlet', ' Outlet'),
    how='outer',
)
nbm.print_dataframe_synopsis(*eval(nbm.nvp_expr(
    'df'
)))

# Prep Data

In [None]:
df = df.astype({
    'Num Inlets' : 'int64',
    'Num Outlets' : 'int64',
    'Num Puts Attempted' : 'int64',
    'Num Try Puts Attempted' : 'int64',
    'Num Blocking Puts' : 'int64',
    'Num Try Puts That Succeeded' : 'int64',
    'Num Puts That Succeeded Eventually' : 'int64',
    'Num Blocking Puts That Succeeded Immediately' : 'int64',
    'Num Puts That Succeeded Immediately' : 'int64',
    'Num Puts That Blocked' : 'int64',
    'Num Dropped Puts' : 'int64',
    'Num Round Trip Touches Inlet' : 'int64',
    'Net Flux Through Duct' : 'int64',
    'proc' : 'int64',
    'Snapshot' : 'int64',
    'Has Execution Blur' : 'bool',
    'Replicate' : 'int64',
    'Async Mode' : 'int64',
    'Num Threads' : 'int64',
    'Num Processes' : 'int64',
    'SLURM_NNODES' : 'int64',
    'SLURM_NTASKS' : 'int64',
    'SLURM_CPUS_ON_NODE' : 'int64',
})

In [None]:
df['Hostname'] = df.apply(
    lambda row: kn.unpack(row['Source File Inlet'])['_hostname'],
    axis=1,
)

In [None]:
df['Num Nodes'] = df['SLURM_NNODES']
df['Num Tasks'] = df['SLURM_NTASKS']
df['Num Cpus'] = df['Num Threads'] * df['Num Processes']
df['Allocated Tasks Per Node'] = df['Num Tasks'] // df['Num Nodes']
df['Delivery Time Inlet'] = (df['Num Puts Attempted'] - 1) / df['Num Round Trip Touches Inlet']
df['Delivery Time Outlet'] = (df['Num Pulls Attempted'] - 1) / df['Num Round Trip Touches Outlet']
df['Intermittancy'] = df['Num Pulls That Were Laden Immediately'] / df[['Net Flux Through Duct', 'Num Pulls Attempted']].min(axis=1)
df['Inlet-Seconds Elapsed'] = df['Num Inlets'] * df['Runtime Seconds Elapsed Inlet']
df['Outlet-Seconds Elapsed'] = df['Num Outlets'] * df['Runtime Seconds Elapsed Outlet']
df['Latency Simsteps Inlet'] = df['Delivery Time Inlet']
df['Latency Simsteps Outlet'] = df['Delivery Time Inlet']
df['Simstep Period Inlet'] = df['Num Puts Attempted'] / df['Inlet-Seconds Elapsed']
df['Simstep Period Outlet'] = df['Num Pulls Attempted'] / df['Outlet-Seconds Elapsed']
df['Latency Walltime Inlet'] = df['Latency Simsteps Inlet'] * df['Simstep Period Inlet']
df['Latency Walltime Outlet'] = df['Latency Simsteps Outlet'] * df['Simstep Period Outlet']

# Prep DataFrame Variants

In [None]:
# https://stackoverflow.com/a/40629420
df_finalized_observations = df.sort_values('Update', ascending=False).drop_duplicates(['Process Instance UUID'])

In [None]:
df_blurry_snapshots = df[df['Has Execution Blur'].astype(bool)]

In [None]:
df_world_sum = df_finalized_observations.groupby([
    'Replicate',
    'Async Mode',
    'Num Processes',
    'Num Nodes',
    'Allocated Tasks Per Node',
],  as_index=False).sum()

df_world_sum['Fraction Messages Utilized'] = df_world_sum['Num Reads That Were Fresh'] / df_world_sum['Num Try Puts Attempted']
df_world_sum['Fraction Messages Delivered'] = df_world_sum['Num Try Puts That Succeeded'] / df_world_sum['Num Try Puts Attempted']
df_world_sum['Delivery Failure Rate'] = 1.0 - df_world_sum['Fraction Messages Delivered']
df_world_sum['Fraction Messages Dropped'] = df_world_sum['Delivery Failure Rate']
df_world_sum['Fraction Try Pulls That Were Laden'] = df_world_sum['Num Try Pulls That Were Laden'] / df_world_sum['Num Try Pulls Attempted']
df_world_sum['Round Trip Touches Per Attempted Pull'] = df_world_sum['Num Round Trip Touches Outlet'] / df_world_sum['Num Try Pulls Attempted']
df_world_sum['Round Trip Touches Per Attempted Put'] = df_world_sum['Num Round Trip Touches Inlet'] / df_world_sum['Num Try Puts Attempted']
df_world_sum['Num Inflight Messages'] = 2.0 / df_world_sum['Round Trip Touches Per Attempted Put'] - 1
df_world_sum['Fraction Duct Flux Stepped Through'] = df_world_sum['Num Revisions Pulled'] / df_world_sum['Net Flux Through Duct']
df_world_sum['Fraction Duct Flux Jumped Over'] = 1.0 - df_world_sum['Fraction Duct Flux Stepped Through']
df_world_sum['Round Trip Touches Per Runtime Second'] = df_world_sum['Num Round Trip Touches Inlet'] / df_world_sum['Runtime Seconds Elapsed Inlet']
df_world_sum['Latency Simsteps Inlet'] = (df_world_sum['Num Puts Attempted'] - 1) / df_world_sum['Num Round Trip Touches Inlet']
df_world_sum['Latency Simsteps Outlet'] = (df_world_sum['Num Pulls Attempted'] - 1) / df_world_sum['Num Round Trip Touches Outlet']
df_world_sum['Delivery Burstiness'] = df_world_sum['Num Pulls That Were Laden Immediately'] / df_world_sum[['Net Flux Through Duct', 'Num Pulls Attempted']].min(axis=1)
df_world_sum['Intermittancy'] = df_world_sum['Delivery Burstiness']
df_world_sum['Simstep Period Inlet (s)'] = df_world_sum['Inlet-Seconds Elapsed'] / df_world_sum['Num Puts Attempted']
df_world_sum['Simstep Period Outlet (s)'] = df_world_sum['Outlet-Seconds Elapsed'] / df_world_sum['Num Pulls Attempted']
df_world_sum['Latency Walltime Inlet (s)'] = df_world_sum['Latency Simsteps Inlet'] * df_world_sum['Simstep Period Inlet (s)']
df_world_sum['Latency Walltime Outlet (s)'] = df_world_sum['Latency Simsteps Outlet'] * df_world_sum['Simstep Period Outlet (s)']

In [None]:
df_snapshot_diffs = df_blurry_snapshots.groupby(
    [
        'Process Instance UUID',
        'Snapshot',
        # subsequent items aren't meaningful to groupby
        # but are just included so they pass through untouched
        'Async Mode',
        'Num Nodes',
        'Allocated Tasks Per Node',
        'Num Processes',
        'Replicate',
        'proc',
        'Hostname',
        'Num Inlets',
        'Num Outlets',

    ],
    as_index=False,
).aggregate({
    'Num Puts Attempted' : np.ptp,
    'Num Try Puts Attempted' : np.ptp,
    'Num Blocking Puts'  : np.ptp,
    'Num Try Puts That Succeeded' : np.ptp,
    'Num Puts That Succeeded Eventually' : np.ptp,
    'Num Blocking Puts That Succeeded Immediately' : np.ptp,
    'Num Puts That Succeeded Immediately' : np.ptp,
    'Num Puts That Blocked' : np.ptp,
    'Num Dropped Puts' : np.ptp,
    'Num Reads Performed' : np.ptp,
    'Num Reads That Were Fresh' : np.ptp,
    'Num Reads That Were Stale' : np.ptp,
    'Num Revisions Pulled' : np.ptp,
    'Num Try Pulls Attempted' : np.ptp,
    'Num Blocking Pulls' : np.ptp,
    'Num Blocking Pulls That Blocked' : np.ptp,
    'Num Revisions From Try Pulls' : np.ptp,
    'Num Revisions From Blocking Pulls' : np.ptp,
    'Num Pulls Attempted' : np.ptp,
    'Num Pulls That Were Laden Eventually' : np.ptp,
    'Num Blocking Pulls That Were Laden Immediately' : np.ptp,
    'Num Blocking Pulls That Were Laden Eventually' : np.ptp,
    'Num Pulls That Were Laden Immediately' : np.ptp,
    'Num Try Pulls That Were Laden' : np.ptp,
    'Num Try Pulls That Were Unladen' : np.ptp,
    'Net Flux Through Duct' : np.ptp,
    'Num Round Trip Touches Inlet' : np.ptp,
    'Num Round Trip Touches Outlet' : np.ptp,
# why are these missing?
#     'Row Initial Timepoint (ns) Inlet' : np.ptp,
#     'Row Initial Timepoint (ns) Outlet' : np.ptp,
    'Row Final Timepoint (ns) Inlet' : np.ptp,
    'Row Final Timepoint (ns) Outlet' : np.ptp,
})

In [None]:
df_snapshot_diffs['Fraction Messages Delivered'] = (
    df_snapshot_diffs['Num Try Puts That Succeeded']
    / df_snapshot_diffs['Num Try Puts Attempted']
)
df_snapshot_diffs['Delivery Failure Rate'] = (
    df_snapshot_diffs['Num Try Puts That Succeeded']
    / df_snapshot_diffs['Num Try Puts Attempted']
)
df_snapshot_diffs['Fraction Messages Dropped'] = df_snapshot_diffs['Delivery Failure Rate']
df_snapshot_diffs['Fraction Try Pulls That Were Laden'] = (
    df_snapshot_diffs['Num Try Pulls That Were Laden']
    / df_snapshot_diffs['Num Try Pulls Attempted']
)

df_snapshot_diffs['Round Trip Touches Per Attempted Put'] = (
    df_snapshot_diffs['Num Round Trip Touches Inlet']
) / df_snapshot_diffs['Num Try Puts Attempted']

df_snapshot_diffs['Round Trip Touches Per Attempted Pull'] = (
    df_snapshot_diffs['Num Round Trip Touches Outlet']
) / df_snapshot_diffs['Num Try Pulls Attempted']

df_snapshot_diffs['Round Trip Touches Per Runtime Nanosecond'] = (
    df_snapshot_diffs['Num Round Trip Touches Outlet']
) / df_snapshot_diffs['Row Final Timepoint (ns) Outlet']

df_snapshot_diffs['Latency Simsteps Inlet'] = df_snapshot_diffs['Num Puts Attempted'] / df_snapshot_diffs['Num Round Trip Touches Inlet']
df_snapshot_diffs['Latency Simsteps Outlet'] = df_snapshot_diffs['Num Pulls Attempted'] / df_snapshot_diffs['Num Round Trip Touches Outlet']
df_snapshot_diffs['Delivery Burstiness'] = df_snapshot_diffs['Num Pulls That Were Laden Immediately'] / df_snapshot_diffs[['Net Flux Through Duct', 'Num Pulls Attempted']].min(axis=1)
df_snapshot_diffs['Intermittancy'] = df_snapshot_diffs['Delivery Burstiness']
df_snapshot_diffs['Inlet-Nanoseconds Elapsed'] = df_snapshot_diffs['Num Inlets'] * df_snapshot_diffs['Row Final Timepoint (ns) Inlet']
df_snapshot_diffs['Outlet-Nanoseconds Elapsed'] = df_snapshot_diffs['Num Outlets'] * df_snapshot_diffs['Row Final Timepoint (ns) Outlet']
df_snapshot_diffs['Simsteps Elapsed Inlet'] = df_snapshot_diffs['Num Puts Attempted'] / df_snapshot_diffs['Num Inlets']
df_snapshot_diffs['Simsteps Elapsed Outlet'] = df_snapshot_diffs['Num Pulls Attempted'] / df_snapshot_diffs['Num Outlets']
df_snapshot_diffs['Simstep Period Inlet (ns)'] = df_snapshot_diffs['Inlet-Nanoseconds Elapsed'] / df_snapshot_diffs['Num Puts Attempted']
df_snapshot_diffs['Simstep Period Outlet (ns)'] = df_snapshot_diffs['Outlet-Nanoseconds Elapsed'] / df_snapshot_diffs['Num Pulls Attempted']
df_snapshot_diffs['Latency Walltime Inlet (ns)'] = df_snapshot_diffs['Latency Simsteps Inlet'] * df_snapshot_diffs['Simstep Period Inlet (ns)']
df_snapshot_diffs['Latency Walltime Outlet (ns)'] = df_snapshot_diffs['Latency Simsteps Outlet'] * df_snapshot_diffs['Simstep Period Outlet (ns)']

# End-state Data Analysis

This data appears to be skewed by ragged network launch/completion.

In [None]:
def facet_boxplot(*, data, facet, x, y, showfliers=False):
    g = sns.FacetGrid(
        data,
        col=facet,
        margin_titles=True,
    )
    g.map_dataframe(
        sns.boxplot,
        x,
        y,
        showfliers=showfliers,
    )

## Latency Walltime

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Walltime Inlet (s)',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Walltime Outlet (s)',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

## Latency Simsteps

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Simsteps Inlet',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Simsteps Outlet',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

## Delivery Failure Rate

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Delivery Failure Rate',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

## Delivery Burtiness

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Delivery Burstiness',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

## Simstep Period

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Simstep Period Inlet (s)',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

In [None]:
tp.tee(
    facet_boxplot,
    data=df_world_sum,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Simstep Period Outlet (s)',
    showfliers=True,
    teeplot_outattrs={
        **{
            'transform' : 'endstate_sumedbyrep',
        },
        **nbm.collate_outattr_metadata(),
    },
)

# Live Snapshot Analysis

In [None]:
def facet_boxplot(*, data, facet, x, y, showfliers=False):
    g = sns.FacetGrid(
        data,
        col=facet,
        margin_titles=True,
    )
    g.map_dataframe(
        sns.boxplot,
        x,
        y,
        showfliers=showfliers,
    )

## Latency Walltime

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Walltime Inlet (ns)',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Walltime Outlet (ns)',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

## Latency Simsteps

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Simsteps Inlet',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Latency Simsteps Outlet',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

## Delivery Failure Rate

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Delivery Failure Rate',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

## Delivery Burstiness

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Delivery Burstiness',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

## Simstep Period

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Simstep Period Inlet (ns)',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

In [None]:
tp.tee(
    facet_boxplot,
    data=df_snapshot_diffs,
    facet='Allocated Tasks Per Node',
    x='Num Processes',
    y='Simstep Period Outlet (ns)',
    showfliers=False,
    teeplot_outattrs={
        **{
            'transform' : 'snapshot_diffs',
        },
        **nbm.collate_outattr_metadata(),
    }
)

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
df_snapshot_diffs[
    (df_snapshot_diffs['Latency Simsteps Inlet'] > 100)
]