# ALI Performance Tests on Blake

In [1]:
# https://stackoverflow.com/questions/27934885/how-to-hide-code-from-cells-in-ipython-notebook-visualized-with-nbviewer
from IPython.display import HTML
button = HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Show/hide code blocks"></form>''')

In [2]:
button

In [3]:
import datetime as dt
import glob
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import os
import json

import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode

# Import scripts
from json2timeline import json2dataframe
from utils import *
from models import findChangePts

In [4]:
# Enable offline plot export
init_notebook_mode(connected=True)

## Specifications

In [18]:
# Cases to plot:
nproc = 384
names = ['Total Time']
timers = ['Albany Total Time:']
metadata = ['Albany cxx compiler',
            'Albany git commit id',
            'Trilinos git commit id']

cases = ['ant-2-20km_ml_ls',
         'ant-2-20km_mu_ls',
         'ant-2-20km_mu_dls',
         'green-1-7km_fea_1ws',
         'green-1-7km_ml_ls_1ws',
         'green-1-7km_mu_ls_1ws',
         'green-1-7km_mu_dls_1ws',
         'green-1-7km_fea_mem',
         'green-1-7km_ml_ls_mem',
         'green-1-7km_mu_ls_mem',
         'green-1-7km_mu_dls_mem']
# names =  ('Total Time',
#           'Setup Time',
#           'Total Fill Time',
#           'Residual Fill',
#           'Residual Fill Evaluate',
#           'Residual Fill Export',
#           'Jacobian Fill',
#           'Jacobian Fill Evaluate',
#           'Jacobian Fill Export',
#           'NOX Total Preconditioner Construction',
#           'NOX Total Linear Solve')
# timers = ('Albany Total Time:',
#           'Albany: Setup Time:',
#           'Albany: Total Fill Time:',
#           'Albany Fill: Residual:',
#           'Albany Residual Fill: Evaluate:',
#           'Albany Residual Fill: Export:',
#           'Albany Fill: Jacobian:',
#           'Albany Jacobian Fill: Evaluate:',
#           'Albany Jacobian Fill: Export:',
#           'NOX Total Preconditioner Construction:',
#           'NOX Total Linear Solve:')

# Extract file names and collect data
files = glob.glob('ctest-*')
df = json2dataframe(files, cases, nproc, names, timers, metadata)

# # Filter data by date if desired
# import datetime as dt
# df = df[df['date'] < dt.datetime.strptime('20191231', '%Y%m%d')]

In [19]:
# Set threshold for model.
threshold = 0.0001

# Number of days since most recent to warn about
recency = 10

In [20]:
print('Test cases:')
[print('  '+c) for c in cases]
print('Timers:')
[print('  '+n) for n in names]
print("Model threshold: %f" % threshold)

Test cases:
  ant-2-20km_ml_ls
  ant-2-20km_mu_ls
  ant-2-20km_mu_dls
  green-1-7km_fea_1ws
  green-1-7km_ml_ls_1ws
  green-1-7km_mu_ls_1ws
  green-1-7km_mu_dls_1ws
  green-1-7km_fea_mem
  green-1-7km_ml_ls_mem
  green-1-7km_mu_ls_mem
  green-1-7km_mu_dls_mem
Timers:
  Total Time
Model threshold: 0.000100


## Performance Timelines

In [21]:
# Find changepoints and format data to work nicely with plots
seqs = {case:{} for case in cases}
mostRecent = df['date'].max()
events = {}
for case in cases:
    for name in names:
        # Detect changepoints
        data = df.loc[df['case']==case, ['date', name]].dropna()
        pts, _ = findChangePts(data[name].to_numpy(), threshold=threshold,
                               method='mean')
        
        # Look for recent changepoints to notify later
        for d in data['date'].iloc[pts]:
            if d in events.keys():
                events[d][case] = name
            else:
                events[d] = {case: name}
                    
        # Calculate mean/std between changepoints
        cols = ['date', name] + list(metadata)
        data = df.loc[df['case']==case, cols].dropna(subset=[name])
        data.reset_index(drop=True, inplace=True)
        data.rename(columns={name:'time'}, inplace=True)
        mean, std = regimeTimeseries(data['time'].to_numpy(), pts)
        temp = {'mean': mean, 'upper': mean+2*std, 'lower': mean-2*std}
        seqs[case][name] = pd.concat((data, pd.DataFrame(temp)), axis=1)
        
''''''
# Sort and print results
events = {k:events[k] for k in sorted(events.keys())}
print('Events in the most recent %d days:' % recency)
recentEvents = printEvents(events, mostRecent, recency)

Events in the most recent 10 days:
02/01/2020:
    green-1-7km_mu_dls_1ws: Total Time


In [61]:
lines = ['time', 'mean', 'upper', 'lower']
colors0 = ['darkred', 'indianred', 'indianred', 'indianred']
colors = ['midnightblue', 'cornflowerblue', 'cornflowerblue', 'cornflowerblue']
modes = ['markers', 'lines', 'lines', 'lines']
dashes = ['solid', 'solid', 'dot', 'dot']

fig = go.Figure()
# Create series on plot
for line, color0, color, mode, dash in zip(lines, colors0, colors, modes, dashes):
    first = True
    for c in [cases[0]] + cases:
        if line == 'time':
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=seqs[c][names[0]][line],
                mode=mode,
                line = dict(color=color0 if first else color, dash=dash, width=1.5),
                name=c,
                visible=first or c == cases[1],
                legendgroup = 'g1' if first else 'g2',
                customdata=seqs[c][names[0]].loc[:, ['date']+list(metadata)],
                hovertemplate=
                "Date: %{customdata[0]}<br>" +
#                 "Albany compiler: %{customdata[1]}<br>" +
                "Albany commit: %{customdata[2]}<br>" +
                "Trilinos commit: %{customdata[3]}" +
                "<extra></extra>",
            ))
        else:
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=seqs[c][names[0]][line],
                mode=mode,
                line = dict(color=color0 if first else color, dash=dash, width=1.5),
                name=line,
                visible=first or c == cases[1],
                legendgroup = 'g1' if first else 'g2',
                hoverinfo='skip'
            ))
        first = False

# Case 1 dropdown (toggle y-values for first set of series)
case1Options = [dict(
        args=[{'x': [seqs[c][names[0]]['date'] for line in lines for c in [c1]+cases],
               'y': [seqs[c][names[0]][line] for line in lines for c in [c1]+cases],
               'customdata': [seqs[c][names[0]].loc[:, ['date']+list(metadata)].to_numpy() if line == 'time' else None
                              for line in lines for c in [c1]+cases]}],
        label=c1,
        method='restyle'
    ) for c1 in cases]

# Case 2 dropdown (toggle visibility for other series)
case2Options = [dict(
        args=['visible', [True]+[c == c2 for c in cases]],
        label= c2,
        method='restyle'
    ) for c2 in cases]
    

# Add dropdowns to plot
fig.update_layout(
    updatemenus=[
        go.layout.Updatemenu(
            buttons=list(case1Options),
            direction="down",
            pad={"r": 0, "t": 0},
            showactive=True,
            x=0,    xanchor="left",
            y=1.15, yanchor="top"
        ),
        go.layout.Updatemenu(
            buttons=list(case2Options),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.3, xanchor="left",
            y=1.15, yanchor="top"
        ),
    ],
    xaxis_title='Simulation Date',
    yaxis_title='Wall-clock Time (s)'
)

iplot(fig)

In [53]:
case1Options[0]['args'][0]['customdata'][0]

Unnamed: 0,date,Albany cxx compiler,Albany git commit id,Trilinos git commit id
0,2019-10-17,,,
1,2019-10-18,,,
2,2019-10-20,,,
3,2019-10-22,,,
4,2019-10-23,,,
...,...,...,...,...
73,2020-01-30,Intel 18.0.1.20171018,ae8ded0,b71a183
74,2020-02-01,Intel 18.0.1.20171018,e1d29fb,3e1ac3b
75,2020-02-02,Intel 18.0.1.20171018,e1d29fb,c486903
76,2020-02-03,Intel 18.0.1.20171018,bc9fda0,200c15e


In [56]:
seqs[c][names[0]].loc[:, ['date']+list(metadata)].to_numpy()

array([[Timestamp('2019-12-27 00:00:00'), 'Intel 18.0.1.20171018',
        'cef1a10', '647ae4e'],
       [Timestamp('2019-12-28 00:00:00'), 'Intel 18.0.1.20171018',
        'cef1a10', '647ae4e'],
       [Timestamp('2019-12-29 00:00:00'), 'Intel 18.0.1.20171018',
        'cef1a10', '647ae4e'],
       [Timestamp('2019-12-31 00:00:00'), 'Intel 18.0.1.20171018',
        'cef1a10', '647ae4e'],
       [Timestamp('2020-01-01 00:00:00'), 'Intel 18.0.1.20171018',
        'cef1a10', '647ae4e'],
       [Timestamp('2020-01-02 00:00:00'), 'Intel 18.0.1.20171018',
        'cef1a10', '647ae4e'],
       [Timestamp('2020-01-07 00:00:00'), 'Intel 18.0.1.20171018',
        '1e23063', 'f774bcb'],
       [Timestamp('2020-01-08 00:00:00'), 'Intel 18.0.1.20171018',
        'befa306', '1c59af4'],
       [Timestamp('2020-01-09 00:00:00'), 'Intel 18.0.1.20171018',
        'e62f701', '019437c'],
       [Timestamp('2020-01-10 00:00:00'), 'Intel 18.0.1.20171018',
        'afd350d', 'ad4f535'],
       [Timestamp('2

### Plot of wall-clock times for nightly runs
Changepoints are estimated using a generalized likelihood ratio method on each timer, and then merged over all timers for a given test case. 
* Blue markers: recorded wall-clock time
* Solid red line: average wall-clock time between changepoints
* Dotted red lines: average wall-clock time $\pm$ two standard deviations

#### Plot window controls

* Test case and timer can be selected from the drop-down menus (* denotes recent change for a test case)
* Hovering over data points shows various metadata
* Clicking on the legend will show/hide individual plot elements
* Click and drag to zoom in; double click to reset zoom

Pollak, Moshe; Siegmund, D. Sequential Detection of a Change in a Normal Mean when the Initial Value is Unknown. Ann. Statist. 19 (1991), no. 1, 394--416. doi:10.1214/aos/1176347990. https://projecteuclid.org/euclid.aos/1176347990

Siegmund, D.; Venkatraman, E. S. Using the Generalized Likelihood Ratio Statistic for Sequential Detection of a Change-Point. Ann. Statist. 23 (1995), no. 1, 255--271. doi:10.1214/aos/1176324466. https://projecteuclid.org/euclid.aos/1176324466

Hawkins, D. M., & Zamba, K. D. (2005). Statistical Process Control for Shifts in Mean or Variance using a Change Point Formulation. Technometrics, 47, 164-173.

Hawkins DM, Qiu P, Kang CW. The changepoint model for statistical process control. Journal of Quality Technology. 2003 Oct 1;35(4):355-366.