# ALI Performance Tests on Blake
Performance comparison dashboard. Must be run in Jupyter notebook to interact with plots.

In [1]:
import datetime as dt
import glob
import numpy as np
import pandas as pd
import json

import plotly.graph_objects as go

# Import scripts
from json2timeline import json2dataframe
from models import findChangePts
from basicstats import *
from utils import *

In [2]:
hide_code_button()

In [3]:
# Load configuration file
with open('config.json') as jf:
    config = json.load(jf)
check_config(config)
for key,val in config.items():
        exec(key + '=val')

# Extract file names and collect data
files = glob.glob('json/ctest-*')
df = json2dataframe(files, cases, nproc, names, timers, metadata)

# # Filter data by date if desired
# import datetime as dt
# df = df[df['date'] < dt.datetime.strptime('20191231', '%Y%m%d')]

In [4]:
#Print some information
print('Test cases:')
[print('  '+c) for c in cases]
print('Timers:')
[print('  '+n) for n in names]
print("Model threshold: %f" % threshold)

Test cases:
  ant-2-20km_ml_ls
  ant-2-20km_mu_ls
  ant-2-20km_mu_dls
  green-1-7km_fea_1ws
  green-1-7km_ml_ls_1ws
  green-1-7km_mu_ls_1ws
  green-1-7km_mu_dls_1ws
  green-1-7km_fea_mem
  green-1-7km_ml_ls_mem
  green-1-7km_mu_ls_mem
  green-1-7km_mu_dls_mem
Timers:
  Total Time
  Setup Time
  Total Fill Time
  Residual Fill
  Residual Fill Evaluate
  Residual Fill Export
  Jacobian Fill
  Jacobian Fill Evaluate
  Jacobian Fill Export
  NOX Total Preconditioner Construction
  NOX Total Linear Solve
Model threshold: 0.000100


In [5]:
# Find changepoints and format data to work nicely with plots
seqs = {case:{} for case in cases}
mostRecent = df['date'].max()
events = {}
print('Finding changepoints')
for case in cases:
    print(case, end='')
    for name in names:
        print('.', end='')
        # Detect changepoints
        cols = ['date', name] + list(metadata)
        data = df.loc[df['case']==case, cols].dropna(subset=[name])
        data.reset_index(drop=True, inplace=True)
        data.rename(columns={name:'time'}, inplace=True)
        pts, _ = findChangePts(data['time'].to_numpy(), threshold=threshold)
        
        # Build dictionary of changepoints
        for d in data['date'].iloc[pts]:
            if d in events.keys():
                if case in events[d].keys():
                    events[d][case].append(name)
                else:
                    events[d][case] = [name]
            else:
                events[d] = {case: [name]}
                    
        # Calculate mean/std between changepoints
        mean, upper, lower = regimeTimeseries(data['time'].to_numpy(), pts)
        temp = {'mean': mean, 'upper': upper, 'lower': lower}
        seqs[case][name] = pd.concat((data, pd.DataFrame(temp)), axis=1)
    print()
clear_output()

# Sort and print recent events
events = {k:events[k] for k in sorted(events.keys())}
print('Events in the most recent %d days:' % recency)
recentEvents = printEvents(events, mostRecent, recency)

Events in the most recent 10 days:
02/01/2020:
    green-1-7km_mu_ls_1ws: Residual Fill
    green-1-7km_mu_dls_1ws: Residual Fill
    green-1-7km_fea_mem: Residual Fill
    green-1-7km_mu_ls_mem: Residual Fill
    green-1-7km_mu_dls_mem: Residual Fill
02/02/2020:
    green-1-7km_mu_dls_mem: NOX Total Linear Solve


In [6]:
# Plot results
lines = ['time', 'mean'] # 'upper', 'lower']
colors = ['darkred', 'midnightblue']
modes = ['markers', 'lines', 'lines', 'lines']
dashes = ['solid', 'solid', 'dot', 'dot']

fig = go.FigureWidget()
# Create series on plot
for line, mode, dash in zip(lines, modes, dashes):
    for c, color in zip(cases[:2], colors):
        first = c == cases[0]
        if line == 'time':
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=seqs[c][names[0]][line],
                mode=mode,
                line=dict(color=color, dash=dash, width=1.5),
                marker_symbol='square' if first else 'circle',
                name=c,
                legendgroup='g1' if first else 'g2',
                customdata=seqs[c][names[0]][['date']+list(metadata)],
                hovertemplate=
                "Date: %{customdata[0]}<br>" +
#                 "Albany compiler: %{customdata[1]}<br>" +
                "Albany commit: %{customdata[2]}<br>" +
                "Trilinos commit: %{customdata[3]}" +
                "<extra></extra>",
            ))
        else:
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=seqs[c][names[0]][line],
                mode=mode,
                line=dict(color=color, dash=dash, width=1.5),
                name=line,
                legendgroup='g1' if first else 'g2',
                hoverinfo='skip'
            ))

fig = fig.update_layout(
    title='Nightly test performance',
    xaxis_title='Simulation Date',
    yaxis_title='Wall-clock Time (s)'
)


In [None]:
# Look at paired data
def paired_data(c1, c2, name):
    df1, df2 = seqs[c1][name].set_index('date'), seqs[c2][name].set_index('date')
    df = df1.join(df2, lsuffix='_c1', rsuffix='_c2', how='inner')
    df.reset_index(inplace=True)
    df.rename(columns={'date':'date_c1'}, inplace=True)
    timediff = df['time_c1'] - df['time_c2']
    df = df[[c+'_c1' for c in ['date']+list(metadata)]]
    df.columns = ['date']+list(metadata)
    df['time'] = timediff 
    pts, _ = findChangePts(df['time'].to_numpy(), threshold=threshold)

    # Calculate mean/std between changepoints
    mean, upper, lower = regimeTimeseries(df['time'].to_numpy(), pts, std_error=True)
    temp = {'mean': mean, 'upper': upper, 'lower': lower}
    df = pd.concat((df, pd.DataFrame(temp)), axis=1)
    return df

pair_df = paired_data(cases[0], cases[1], names[0])
lines_all = ['time', 'mean', 'upper', 'lower']
diff_fig = go.FigureWidget()
for line, mode, dash in zip(lines_all, modes, dashes):
    color = 'blueviolet'
    if line == 'time':
        diff_fig.add_trace(go.Scatter(
            x=pair_df['date'],
            y=pair_df[line],
            mode=mode,
            line=dict(color=color, dash=dash, width=1.5),
            marker_symbol='circle',
            name='Time Difference',
            customdata=pair_df[['date']+list(metadata)],
            hovertemplate=
            "Date: %{customdata[0]}<br>" +
#                 "Albany compiler: %{customdata[1]}<br>" +
            "Albany commit: %{customdata[2]}<br>" +
            "Trilinos commit: %{customdata[3]}" +
            "<extra></extra>",
        ))
    else:
        diff_fig.add_trace(go.Scatter(
            x=pair_df['date'],
            y=pair_df[line],
            mode=mode,
            line=dict(color=color, dash=dash, width=1.5),
            name=line,
            hoverinfo='skip'
        ))

diff_fig = diff_fig.update_layout(
    shapes=[dict(
        type = "line", 
        x0 = 0, x1 = 1, xref = "paper",
        y0 = 0, y1 = 0
    )],
    title='Analysis of performance difference (Case1 minus Case2)',
    xaxis_title='Simulation Date',
    yaxis_title='Difference (s)'
)

In [None]:
def latest_stats(case, name):
    df = seqs[case][name]
    n = df.shape[0]
    mean = df['mean'][n-1]
    std = .5*(df['upper'][n-1] - mean)
    pts = []
    for i in reversed(range(n)):
        if df['mean'][i] == mean:
            pts.append(df['time'][i])
        else:
            break
    return df['date'][i+1], np.array(pts), mean, std

# Create histogram of recent data from two test cases
hist = go.FigureWidget()
for c, color in zip(cases[:2], colors):
    hist.add_trace(go.Histogram(
        x=latest_stats(c, names[0])[1],
        name=c,
        marker_color=color,
        opacity=0.7
    ))
hist = hist.update_layout(
    barmode='overlay',
    title='Observations since latest changepoints',
    xaxis_title='Wall-clock Time (s)',
    yaxis_title='Count',
    legend_orientation='h',
    legend=dict(x=0, y=1.11, bgcolor=None)
)

In [None]:
from ipywidgets import Output
# Create a textbox that will show t-test results, and update it
textbox = Output(layout={'border': '1px solid black'})
def update_textbox(c1=cases[0], c2=cases[1], n=names[0]):
    with textbox:
        textbox.clear_output()
        d1, p1, m1, s1 = latest_stats(c1, n)
        d2, p2, m2, s2 = latest_stats(c2, n)
        signif = lambda pval: '*'*(int(pval<0.05)+int(pval<0.01)+int(pval<0.001))
        
        # Summary statistics
        print('Data since latest changepoints')
        print(cases[0])
        print('  N   : %d (since %s)\n  mean: %.2f\n  std : %.2f' % 
              (len(p1), dt.datetime.strftime(d1,'%b %d'), m1, s1))
        print(cases[1])
        print('  N   : %d (since %s)\n  mean: %.2f\n  std : %.2f' % 
              (len(p2), dt.datetime.strftime(d2,'%b %d'), m2, s2))
        
        # Test results
        _, t_pval = ttest(p1, p2, with_pval=True)
        print('t-test p-value: %.4f%s' % (t_pval, signif(t_pval)))
        perm_pval = permtest(p1, p2, N=10000)
        print('perm test p-value: %.4f%s' % (perm_pval, signif(perm_pval)))
        
update_textbox()

In [None]:
# Function that will update all chart elements based on dropdowns
def update(Case1=cases[0], Case2=cases[1], Timer=names[0]):
    c1, c2, n = Case1, Case2, Timer
    pair_df = paired_data(c1, c2, n)
    with fig.batch_update():
        i = 0
        for line, mode, dash in zip(lines, modes, dashes):
            fig.data[i]['x']=seqs[c1][n]['date']
            fig.data[i]['y']=seqs[c1][n][line]
            fig.data[i]['name']=c1 if i < 2 else line
            fig.data[i]['customdata']=seqs[c1][n].loc[:, ['date']+list(metadata)]
            i += 1
            
            fig.data[i]['x']=seqs[c2][n]['date']
            fig.data[i]['y']=seqs[c2][n][line]
            fig.data[i]['name']=c2 if i < 2 else line
            fig.data[i]['customdata']=seqs[c1][n][['date']+list(metadata)]            
            i += 1
    with diff_fig.batch_update():
        for i, line in enumerate(lines_all):
            diff_fig.data[i]['x']=pair_df['date']
            diff_fig.data[i]['y']=pair_df[line]
            diff_fig.data[i]['customdata']=pair_df[['date']+list(metadata)]    
    with hist.batch_update():
        hist.data[0]['x']=latest_stats(c1,n)[1]
        hist.data[1]['x']=latest_stats(c2,n)[1]
    update_textbox(c1, c2,n)

In [None]:
# Create dashboard
from ipywidgets import interactive, HBox, VBox, HTML, Layout
widget = interactive(update, Case1=list(cases), Case2=list(cases), Timer=list(names))
controls = HBox(widget.children[:-1], layout = Layout(flex_flow='row wrap'))
VBox([
    controls, 
    fig,
    diff_fig,
    hist, textbox
])

### Plot of wall-clock times for nightly runs
Changepoints are estimated using a generalized likelihood ratio method on each timer. 
* Markers: recorded wall-clock time
* Solid line: average wall-clock time between changepoints
* Dotted lines: average wall-clock time $\pm$ two standard deviations

### Histogram of recent observations
Using data since the most recently detected changepoint, we plot histograms of the two cases being considered.

### T-test for difference in average wall-clock time
Data since the most recent changepoint is trimmed to remove outliers, and then we use a t-test to compare the average performance of the two test cases. One, two, and three asterisks indicate significance levels of 0.05, 0.01, and 0.001 respectively.

In [None]:
seqs[cases[1]][names[-1]]['time'].to_numpy()