In [None]:
%matplotlib inline

In [None]:
%load_ext autoreload

In [None]:
%autoreload 3

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from RooFitMP_analysis import *

In [None]:
df_split_timings_1538069 = build_comb_df_split_timing_info('../rootbench/1538069.burrell.nikhef.nl.out')
dfs_1538069 = load_result_file('../rootbench/1538069.burrell.nikhef.nl/RoofitMPworkspace_1549966927.json', match_y_axes=True)

In [None]:
df_total_timings_1538069 = dfs_1538069['BM_RooFit_MP_GradMinimizer_workspace_file']
df_meta_1538069 = df_total_timings_1538069.drop(['real_time', 'real or ideal'], axis=1).dropna().set_index('benchmark_number', drop=True)

In [None]:
df_baseline_timings_1538069 = dfs_1538069['BM_RooFit_RooMinimizer_workspace_file']
vanilla_t, vanilla_t_std = df_baseline_timings_1538069['real_time'].mean() / 1000, df_baseline_timings_1538069['real_time'].std() / 1000

In [None]:
_df = combine_detailed_with_gbench_timings_by_name(df_total_timings_1538069,
                                                   df_split_timings_1538069,
                                                   {'update': 'update state',
                                                    'gradient': 'gradient work',
                                                    'terminate': 'terminate'},
                                                   add_ideal=['gradient'])
_g = sns.relplot(data=_df,
                x='NumCPU', y='time [s]', style="real or ideal",
                hue='timing_type',
                markers=True, err_style="bars", legend='full', kind="line")

linestyle = {'color': 'black', 'lw': 0.7}
_g.axes[0,0].axhline(vanilla_t, **linestyle)
_g.axes[0,0].axhline(vanilla_t - vanilla_t_std, alpha=0.5, **linestyle)
_g.axes[0,0].axhline(vanilla_t + vanilla_t_std, alpha=0.5, **linestyle)

In [None]:
_timing_types = {
    'update': 'update state',
    'gradient': 'gradient work',
    'terminate': 'terminate',
    'partial derivatives': 'partial derivative'
}
_df = combine_detailed_with_gbench_timings_by_name(df_total_timings_1538069,
                                                   df_split_timings_1538069,
                                                   timing_types=_timing_types,
                                                   add_ideal=['gradient'],
                                                   exclude_from_rest=['partial derivatives'])
_g = sns.relplot(data=_df,
            x='NumCPU', y='time [s]', style="real or ideal",
            hue='timing_type',
            markers=True, err_style="bars", legend='full', kind="line")

linestyle = {'color': 'black', 'lw': 0.7}
_g.axes[0,0].axhline(vanilla_t, **linestyle)
_g.axes[0,0].axhline(vanilla_t - vanilla_t_std, alpha=0.5, **linestyle)
_g.axes[0,0].axhline(vanilla_t + vanilla_t_std, alpha=0.5, **linestyle)

# Run with optConst = 1

In [None]:
df_split_timings_1604382 = build_comb_df_split_timing_info('../rootbench/1604382.burrell.nikhef.nl.out')

In [None]:
dfs_1604382 = load_result_file('../rootbench/1604382.burrell.nikhef.nl/RoofitMPworkspaceNoOptConst_1551699016.json', match_y_axes=True)

In [None]:
df_total_timings_1604382 = dfs_1604382['BM_RooFit_MP_GradMinimizer_workspace_file_noOptConst']
df_meta_1604382 = df_total_timings_1604382.drop(['real_time', 'real or ideal'], axis=1).dropna().set_index('benchmark_number', drop=True)

df_baseline_timings_1604382 = dfs_1604382['BM_RooFit_RooMinimizer_workspace_file_noOptConst']
vanilla_t, vanilla_t_std = df_baseline_timings_1604382['real_time'].mean() / 1000, df_baseline_timings_1604382['real_time'].std() / 1000

In [None]:
_timing_types = {
    'update': 'update state',
    'gradient': 'gradient work',
    'terminate': 'terminate',
    'partial derivatives': 'partial derivative'
}
_df = combine_detailed_with_gbench_timings_by_name(df_total_timings_1604382,
                                                   df_split_timings_1604382,
                                                   timing_types=_timing_types,
                                                   add_ideal=['gradient'],
                                                   exclude_from_rest=['partial derivatives'])
_g = sns.relplot(data=_df,
            x='NumCPU', y='time [s]', style="real or ideal",
            hue='timing_type',
            markers=True, err_style="bars", legend='full', kind="line")

linestyle = {'color': 'black', 'lw': 0.7}
_g.axes[0,0].axhline(vanilla_t, **linestyle)
_g.axes[0,0].axhline(vanilla_t - vanilla_t_std, alpha=0.5, **linestyle)
_g.axes[0,0].axhline(vanilla_t + vanilla_t_std, alpha=0.5, **linestyle)

# Large workspace run

In [None]:
df_split_timings_1604381 = build_comb_df_split_timing_info('../rootbench/1604381.burrell.nikhef.nl.out')

In [None]:
dfs_1604381 = load_result_file('../rootbench/1604381.burrell.nikhef.nl/RoofitMPworkspace_1551694135.json', match_y_axes=True)

Ok, that one failed after 4 runs, but let's see how that went anyway to get a better feeling for it:

In [None]:
_df = df_split_timings_1604381[df_split_timings_1604381['timing_type'] == 'gradient work']
_x = np.arange(len(_df))
plt.bar(_x, _df['time [s]'])

In [None]:
_df['time [s]'].describe()

# More timings

In the next benchmarks, we added a lot more timing output that needs to be incorporated into the analysis:

- Line search timings: single lines starting with `line_search: `
- update_real timings on queue process
- update_real timings on worker processes
- absolute time stamps (in nanoseconds since epoch) for:
    + start migrad (this line has changed!)
    + end migrad (same)
    + for each worker: lines that contain either two or three stamps:
        - time of ask for task and time of rejection
        - time of ask for task, time of start, time of end of task
    + maybe the update_real/update_state ones as well, don't remember
    
As an additional book keeping complication, we need to run the large workspaces separately for different NumCPU parameters, both to speed up the runs (let them run on the cluster in parallel) and because we are currently getting crashes when running everything in one go; when running with 10 repeats per NumCPU the whole thing just stops after 4 repeats of the single-worker run; when running with 1 repeat it just crashes after the single-worker run (though it does write out the benchmark data to JSON, so that's promising for running the tasks separately).