# ALI Performance Tests on Blake/Waterman

In [1]:
import datetime as dt
import glob
import numpy as np
import pandas as pd
import json
import multiprocessing
import sys

import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode

# Import scripts
sys.path.insert(0,'kcshan-perf-analysis')
from json2timeline import json2dataframe
from models import single_ts_chgpts
from basicstats import add_regime_stats
from utils import * 

In [2]:
hide_code_button()

In [3]:
# Enable offline plot export
init_notebook_mode(connected=True)

## Specifications

In [4]:
# Load configuration file
with open('config.json') as jf:
    config = json.load(jf)
check_config(config)
for key,val in config.items():
        exec(key + '=val')

# Extract file names and collect data
files = glob.glob(json_regex)
df = json2dataframe(files, cases, names, timers, metadata)

# Log-transform the data before modeling
xform = lambda x: np.log(x)
inv_xform = lambda x: np.exp(x)

# # Filter data by date if desired
# import datetime as dt
# df = df[df['date'] < dt.datetime.strptime('20191231', '%Y%m%d')]

In [5]:
print('Test cases:')
[print('  '+c) for c in cases]
print('Timers:')
[print('  '+n) for n in names]
print("Model threshold: %f" % threshold)

Test cases:
  ant-2-20km_ml_ls_np384
  ant-2-20km_mu_ls_np384
  ant-2-20km_mu_dls_np384
  green-1-7km_fea_1ws_np384
  green-1-7km_ml_ls_1ws_np384
  green-1-7km_mu_ls_1ws_np384
  green-1-7km_mu_dls_1ws_np384
  green-1-7km_fea_mem_np384
  green-1-7km_ml_ls_mem_np384
  green-1-7km_mu_ls_mem_np384
  green-1-7km_mu_dls_mem_np384
Timers:
  Total Time
  Setup Time
  Total Fill Time
  Residual Fill
  Residual Fill Evaluate
  Residual Fill Export
  Jacobian Fill
  Jacobian Fill Evaluate
  Jacobian Fill Export
  NOX Total Preconditioner Construction
  NOX Total Linear Solve
Model threshold: 0.005000


## Performance Timelines

In [6]:
np.seterr(all='raise') 

# Find changepoints and format data to work nicely with plots
seqs = {case:{} for case in cases}
most_recent = df['date'].max()
events = {}
pool = multiprocessing.Pool(4)

print('Finding changepoints:')
for case in cases:
    print(case)
    for name in names:
        # Detect changepoints
        cols = ['date', name] + list(metadata)
        data = df.loc[df['case']==case, cols].dropna(subset=[name])
        data.reset_index(drop=True, inplace=True)
        data.rename(columns={name:'time'}, inplace=True)
        data['time'] = xform(data['time'])
        seqs[case][name] = data
    pool_inputs = [(k, v, threshold) for k,v in seqs[case].items()]
    chgpts = dict(pool.map(single_ts_chgpts, pool_inputs))
    
    for name in names:
        # Calculate mean/std between changepoints
        seqs[case][name] = add_regime_stats(seqs[case][name], chgpts[name])
        
        # Build dictionary of changepoints
        for d in seqs[case][name]['date'].iloc[chgpts[name]]:
            events.setdefault(d, {}).setdefault(case, []).append(name)
clear_output()

# Sort and print results
events = {k:events[k] for k in sorted(events.keys())}
print('Events in the most recent %d days:' % recency)
recent_events = print_events(events, most_recent, recency)

Events in the most recent 10 days:
03/02/2020:
    green-1-7km_mu_dls_1ws_np384: Jacobian Fill
03/03/2020:
    ant-2-20km_mu_dls_np384: Jacobian Fill


In [7]:
lines = ['time', 'mean', 'upper', 'lower']
colors = ['blue', 'red', 'red', 'red']
modes = ['markers', 'lines', 'lines', 'lines']
dashes = ['solid', 'solid', 'dot', 'dot']

fig = go.Figure()
# Create series on plot
for line, color, mode, dash in zip(lines, colors, modes, dashes):
    for c in cases:
        if line == 'time':
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=inv_xform(seqs[c][names[0]][line]),
                mode=mode,
                line = dict(color=color, dash=dash, width=1.5),
                name=line,
                visible=True if c==cases[0] else False,
                customdata=seqs[c][names[0]][['date']+list(metadata)],
                hovertemplate=
                "Date: %{customdata[0]}<br>" +
#                 "Albany compiler: %{customdata[1]}<br>" +
                "Albany commit: %{customdata[2]}<br>" +
                "Trilinos commit: %{customdata[3]}" +
                "<extra></extra>",
            ))
        else:
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=inv_xform(seqs[c][names[0]][line]),
                mode=mode,
                line = dict(color=color, dash=dash, width=1.5),
                name=line,
                visible=True if c==cases[0] else False,
                hoverinfo='skip'
            ))

changed_cases = {n for v in recent_events.values() for n in v.keys()}

# Test case dropdown
case_options = [dict(
        args=['visible', [True if x==c else False for x in np.tile(cases, len(lines))]],
        label= '*'+c if c in changed_cases else c,
        method='restyle'
    ) for c in cases]
    
# Timer dropdown
name_options = [dict(
        args=[{'x': [seqs[c][n]['date'] for _ in lines for c in cases],
               'y': [inv_xform(seqs[c][n][line]) for line in lines for c in cases],
               'customdata': [seqs[c][n][['date']+list(metadata)].to_numpy()
                              if line == 'time' else None
                              for line in lines for c in cases]}],
        label=n,
        method='restyle'
    ) for n in names]

# Add dropdowns to plot
fig.update_layout(
    updatemenus=[
        go.layout.Updatemenu(
            buttons=list(case_options),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0,    xanchor="left",
            y=1.15, yanchor="top"
        ),
        go.layout.Updatemenu(
            buttons=list(name_options),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.3, xanchor="left",
            y=1.15, yanchor="top"
        ),
    ],
    margin={'l': 50, 'r': 50, 'b': 200, 't': 50},
    height=600,
    xaxis_title='Simulation Date',
    yaxis_title='Wall-clock Time (s)'
)

iplot(fig)

### Plot of wall-clock times for nightly runs
Changepoints are estimated using a generalized likelihood ratio method on each timer, and then merged over all timers for a given test case. 
* Blue markers: recorded wall-clock time
* Solid red line: average wall-clock time between changepoints
* Dotted red lines: average wall-clock time $\pm$ two standard deviations

#### Plot window controls

* Test case and timer can be selected from the drop-down menus (* denotes recent changes detected)
* Hovering over data points shows various metadata
* Clicking on the legend will show/hide individual plot elements
* Click and drag to zoom in; double click to reset zoom

Pollak, Moshe; Siegmund, D. Sequential Detection of a Change in a Normal Mean when the Initial Value is Unknown. Ann. Statist. 19 (1991), no. 1, 394--416. doi:10.1214/aos/1176347990. https://projecteuclid.org/euclid.aos/1176347990

Siegmund, D.; Venkatraman, E. S. Using the Generalized Likelihood Ratio Statistic for Sequential Detection of a Change-Point. Ann. Statist. 23 (1995), no. 1, 255--271. doi:10.1214/aos/1176324466. https://projecteuclid.org/euclid.aos/1176324466

Hawkins, D. M., & Zamba, K. D. (2005). Statistical Process Control for Shifts in Mean or Variance using a Change Point Formulation. Technometrics, 47, 164-173.

Hawkins DM, Qiu P, Kang CW. The changepoint model for statistical process control. Journal of Quality Technology. 2003 Oct 1;35(4):355-366.