# Profiling FLASH

initialization, import all required modules

In [1]:
import datetime
import itertools
import logging
import os
import pathlib
import pprint
import re
import shutil
import subprocess
import sys
import typing as t

import git
import pandas as pd

In [2]:
logging.basicConfig(level=logging.WARNING)

_LOG = logging.getLogger('profile_flash')

In [3]:
try:
    _HERE = pathlib.Path(__file__).parent.resolve()
except NameError:
    _HERE = pathlib.Path(os.getcwd()).resolve()

_RESULTS_ROOT = pathlib.Path(_HERE, 'results')

_HERE, _RESULTS_ROOT

(PosixPath('/nfs2/mbysiek/Projects/docker-transpyle-flash'),
 PosixPath('/nfs2/mbysiek/Projects/docker-transpyle-flash/results'))

check if the envvars are properly set

In [4]:
os.environ['CPATH'], os.environ['LD_LIBRARY_PATH'], shutil.which('hpcrun')

('/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/hpctoolkit-master-wxx6jn7xuoo7lzqhg7kwxx4xl2nrlooc/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/papi-5.6.0-5p5d3qi2imn6m3szyl47gj73dpixhxpf/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/gcc-8.2.0-35mxytud35rkqxpuwxalsxp5s3gqq23f/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/mpc-1.1.0-lfad2lpghnhcljoohbmyz4iznbldjxte/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/mpfr-4.0.1-baptjiihukqeehfh7a3ecoualvmw4jtu/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/isl-0.19-j2nkr2ed5lm3cngy5zamecra6vw6fyks/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/gmp-6.1.2-2ii23bc4vfcihjejzkzw5k53sr7e5sum/include:/nfs2/mbysiek/Software/Spack/opt/spack/linux-ubuntu14.04-x86_64/gcc-8.2.0/superlu-5.2.1-bz4ai7ydn6puimqffcglkl7ky5

import profiling framework implementation

In [5]:
import profiling_flash
from profiling_flash import profile_experiment, date_str, profile_path, profile_db_path

In [6]:
import hpctoolkit_dataframe
from hpctoolkit_dataframe import HPCtoolkitDataFrame

In [7]:
# logging.getLogger('hpctoolkit_dataframe').setLevel(logging.DEBUG)
df = HPCtoolkitDataFrame(path=pathlib.Path(
    '/homes/mbysiek/Projects/data/flash/hpctoolkit/hpctoolkit-flash4-database-4325594/experiment.xml'))

In [12]:
df.columns

Index(['CPUTIME (usec):Sum (I)', 'CPUTIME (usec):Sum (I) ratio of parent',
       'CPUTIME (usec):Sum (I) ratio of total', 'CPUTIME (usec):Sum (E)',
       'callpath', 'module path', 'module', 'file path', 'file', 'line',
       'procedure', 'id', 'type'],
      dtype='object')

In [13]:
df.hot_path()[[
    'file', 'procedure',
    'CPUTIME (usec):Sum (I)', 'CPUTIME (usec):Sum (I) ratio of total', 'CPUTIME (usec):Sum (I) ratio of parent']]

Unnamed: 0,file,procedure,CPUTIME (usec):Sum (I),CPUTIME (usec):Sum (I) ratio of total,CPUTIME (usec):Sum (I) ratio of parent
-1,,,85290300.0,1.0,1.0
2,~unknown-file~,main,83479900.0,0.978774,0.978774
446,~unknown-file~,driver_evolveflash_,77729500.0,0.911352,0.931116
447,,driver_evolveflash_,76348700.0,0.895163,0.982236
2077,~unknown-file~,hydro_,53308300.0,0.625022,0.698221
2625,~unknown-file~,hy_advance_,40351500.0,0.473108,0.756946
2626,,hy_advance_,32116600.0,0.376556,0.795921
2628,~unknown-file~,hy_computefluxes_,31634600.0,0.370905,0.984992
2921,~unknown-file~,hy_getriemannstate_,21057100.0,0.246887,0.665635
2932,,hy_getriemannstate_,20969200.0,0.245857,0.995826


# 1. Running the experiments

## 1.1. Choose experiment

pick one

In [14]:
# Experiment 0

branch_nicknames = {
    'transpyle_experiments': 'base'}
problems = [
    ('Sedov', '-auto -2d +Mode1')]
objdir = 'object'
samples = 1
events = None
mpi_processes = [0]

test_name_template = 'subset_{problem}_baseline'

In [7]:
# Experiment 1

branch_nicknames = {
    'transpyle_experiments': 'base',
    'transpyle_experiments_outlined': 'outlined',
    'transpyle_experiments_autoinlined': 'autoinlined'}
problems = [
    ('Sod', '-auto -2d +Mode1'),
    ('Sedov', '-auto -2d +Mode1')]
objdir = 'object'
samples = 50
events = None
mpi_processes = [0]

test_name_template = 'subset_{problem}_{nickname}'

In [23]:
# Experiment 2

branch_nicknames = {
    'transpyle_experiments': 'base'}
problems = [
    ('Sod', '-auto -2d +Mode1'),
    ('Sedov', '-auto -2d +Mode1')]
objdir = 'object'
samples = 50
events = {'PAPI_MEM_WCY': 100}
mpi_processes = [0]

test_name_template = 'subset_{problem}_papi'

In [26]:
# Experiment 3

branch_nicknames = {
    'transpyle_experiments': 'base'}
problems = [
    ('Sod', '-auto -2d +Mode1')]
objdir = 'object'
samples = 1
events = {'PAPI_TOT_CYC': 100000, 'WALLCLOCK': 1 / 10}
mpi_processes = [1, 2, 4]

test_name_template = 'subset_{problem}_{nickname}_scale_{mpi_proc}'

In [None]:
# Experiment 4

branch_nicknames = {
    'SNIa_spack': 'base'}
problems = [
    ('SNIa_DoubleDetonation', '-auto -3d +cartesian -nxb=16 -nyb=16 -nzb=16 -maxblocks=100 xnet=True xnetData=Data_SN160 xnetGPU=False +uhd +starkiller starkillerGPU=False +Mode1 -debug +newMpole +noio'),
    #('SNIa_DoubleDetonation', '-auto -3d +cartesian -nxb=16 -nyb=16 -nzb=16 -maxblocks=100 xnet=True xnetData=Data_SN160 xnetGPU=False +uhd +starkiller starkillerGPU=False +Mode1 -debug +newMpole'),
    #('SNIa_DoubleDetonation', '-auto -3d +cartesian -nxb=16 -nyb=16 -nzb=16 -maxblocks=100 xnet=True xnetData=Data_SN160 xnetGPU=False +uhd +starkiller starkillerGPU=False +Mode1 +newMpole +noio'),
    #('SNIa_DoubleDetonation', '-auto -3d +cartesian -nxb=16 -nyb=16 -nzb=16 -maxblocks=100 xnet=True xnetData=Data_SN160 xnetGPU=False +uhd +starkiller starkillerGPU=False +Mode1 +newMpole')
    ]
objdir = 'sn1a-doubledet'
samples = 10
events = {'PAPI_TOT_CYC': 100000, 'WALLCLOCK': 1 / 10}
mpi_processes = [0, 1, 2, 4]

test_name_template = 'subset_{problem}_{nickname}_{mpi_proc}'

## 1.2. Run the experiment

In [9]:
_NOW = datetime.datetime.now()

In [10]:
_NOW

datetime.datetime(2018, 9, 18, 17, 22, 32, 628728)

In [12]:
profiling_flash._NOW = _NOW

In [12]:
%%time

for (branch, nickname), (problem, options), mpi_proc in itertools.product(
        branch_nicknames.items(), problems, mpi_processes):
    test_name = test_name_template.format(problem=problem, nickname=nickname, mpi_proc=mpi_proc)
    profile_experiment('flash-subset', '{} {}'.format(problem, options), branch, objdir, samples,
                       events=events, mpi_proc=mpi_proc, # rebuild=False,
                       test_name=test_name)



CPU times: user 151 ms, sys: 47.3 ms, total: 199 ms
Wall time: 3min 44s


In [None]:
clean_flash(pathlib.Path(_HERE, 'flash-subset', 'FLASH4.4', objdir),
            test_name_template.format(problem=problem, nickname=nickname, mpi_proc=mpi_proc))

## 1.3. Add experiment here to be able to review it later

In [13]:
test_date = _NOW

In [14]:
# Experiment 0

test_date = datetime.datetime(2018, 9, 18, 14, 44, 19, 473557)

In [21]:
# Experiment 1

test_date = datetime.datetime(2018, 9, 5, 12, 5, 19, 934255)

In [24]:
# Experiment 2

test_date = datetime.datetime(2018, 9, 11, 10, 2, 31, 884551)

In [27]:
# Experiment 3

test_date = datetime.datetime(2018, 9, 11, 11, 37, 45, 878931)

In [None]:
# Experiment 4

test_date = datetime.datetime(2018, 9, 12, 17, 17, 24, 213713)

## 1.4. Build other tools

In [13]:
profiling_flash.make_sfocu(pathlib.Path(_HERE, 'flash-subset', 'FLASH4.4'))



# 2. Review results

In [15]:
%%time

timings = {}
hot_paths = {}

for (branch, nickname), (problem, options), mpi_proc in itertools.product(
        branch_nicknames.items(), problems, mpi_processes):
    test_name = test_name_template.format(problem=problem, nickname=nickname, mpi_proc=mpi_proc)
    _profile_path = profile_db_path(test_date, test_name=test_name).joinpath('experiment.xml')
    df = HPCtoolkitDataFrame(path=_profile_path, max_depth=None)
    _key = tuple([
        value for _, value in [('problem', problem), ('nickname', nickname), ('mpi_proc', mpi_proc)]
        if '{{{}}}'.format(_) in test_name_template])
    timings[_key] = df
    hot_paths[_key] = df.hot_path()

CPU times: user 2.13 s, sys: 0 ns, total: 2.13 s
Wall time: 2.18 s


## 2.1. Past experiments

Experiment 0: sanity check

In [17]:
hot_paths['Sedov',].compact[-10:]

Unnamed: 0,module,file,line,procedure,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
787,flash4,hy_advance.F90,75,<inline>,13031300.0,0.616925,0.969735
793,flash4,hy_computeFluxes.F90,78,hy_computefluxes_,12999400.0,0.615414,0.997552
850,flash4,hy_getRiemannState.F90,68,hy_getriemannstate_,9935250.0,0.470352,0.764285
862,flash4,hy_getRiemannState.F90,476,hy_getriemannstate_,9911310.0,0.469219,0.99759
864,flash4,hy_getRiemannState.F90,477,hy_getriemannstate_,9903350.0,0.468842,0.999197
898,flash4,hy_dataReconstOneStep.F90,82,hy_datareconstonestep_,9179180.0,0.434559,0.926876
1016,flash4,hy_dataReconstOneStep.F90,485,hy_datareconstonestep_,8183700.0,0.387431,0.89155
1023,flash4,hy_DataReconstructNormalDir_PPM.F90,73,hy_datareconstructnormaldir_ppm_,8032430.0,0.380269,0.981516
1079,flash4,hy_upwindTransverseFlux.F90,43,hy_upwindtransverseflux_,1302000.0,0.061639,0.162093
1088,flash4,hy_upwindTransverseFlux.F90,132,hy_upwindtransverseflux_,1135180.0,0.053741,0.871874


Experiment 1: computational bottleneck

In [51]:
hot_paths['Sedov', 'base'].compact[-3:]

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"('<program root>.6', 'main.8', 'driver_evolveflash_.1004', '<loop 460.1436>', 'hydro_.4266', 'hy_advance_.5235', '<loop 6622.5443>', 'hy_computefluxes_.5445', 'hy_getriemannstate_.6548', '<loop 6882.6621>', '<loop 6884.6623>', 'hy_datareconstonestep_.6701', '<loop 6698.7398>', 'hy_datareconstructnormaldir_ppm_.7400')",7615040.0,0.389404,0.981026
"('<program root>.6', 'main.8', 'driver_evolveflash_.1004', '<loop 460.1436>', 'hydro_.4266', 'hy_advance_.5235', '<loop 6622.5443>', 'hy_computefluxes_.5445', 'hy_getriemannstate_.6548', '<loop 6882.6621>', '<loop 6884.6623>', 'hy_datareconstonestep_.6701', '<loop 6698.7398>', 'hy_datareconstructnormaldir_ppm_.7400', 'hy_upwindtransverseflux_.7411')",1165760.0,0.059613,0.153087
"('<program root>.6', 'main.8', 'driver_evolveflash_.1004', '<loop 460.1436>', 'hydro_.4266', 'hy_advance_.5235', '<loop 6622.5443>', 'hy_computefluxes_.5445', 'hy_getriemannstate_.6548', '<loop 6882.6621>', '<loop 6884.6623>', 'hy_datareconstonestep_.6701', '<loop 6698.7398>', 'hy_datareconstructnormaldir_ppm_.7400', 'hy_upwindtransverseflux_.7411', '<loop 7190.7415>')",1013498.0,0.051826,0.869388


In [52]:
hot_paths['Sedov', 'outlined'].compact[-2:]

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"('<program root>.6', 'main.8', 'driver_evolveflash_.859', '<loop 460.1273>', 'hydro_.4166', 'hy_advance_.5116', '<loop 6628.5311>', 'hy_computefluxes_.5313', 'hy_getriemannstate_.6438', '<loop 6888.6519>', '<loop 6890.6521>', 'hy_datareconstonestep_.6602', '<loop 6704.7269>', 'hy_datareconstructnormaldir_ppm_.7271')",7950080.0,0.394674,0.982663
"('<program root>.6', 'main.8', 'driver_evolveflash_.859', '<loop 460.1273>', 'hydro_.4166', 'hy_advance_.5116', '<loop 6628.5311>', 'hy_computefluxes_.5313', 'hy_getriemannstate_.6438', '<loop 6888.6519>', '<loop 6890.6521>', 'hy_datareconstonestep_.6602', '<loop 6704.7269>', 'hy_datareconstructnormaldir_ppm_.7271', 'hy_upwindtransverseflux_.7282')",1521750.0,0.075546,0.191413


In [53]:
hot_paths['Sedov', 'autoinlined'].compact[-2:]

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"('<program root>.6', 'main.8', 'driver_evolveflash_.1006', '<loop 460.1422>', 'hydro_.4494', 'hy_advance_.5477', '<loop 6624.5674>', 'hy_computefluxes_.5676', 'hy_getriemannstate_.6771', '<loop 6884.6828>', '<loop 6886.6830>', 'hy_datareconstonestep_.6911', '<loop 6700.7566>', 'hy_datareconstructnormaldir_ppm_.7568')",7436440.0,0.380932,0.981087
"('<program root>.6', 'main.8', 'driver_evolveflash_.1006', '<loop 460.1422>', 'hydro_.4494', 'hy_advance_.5477', '<loop 6624.5674>', 'hy_computefluxes_.5676', 'hy_getriemannstate_.6771', '<loop 6884.6828>', '<loop 6886.6830>', 'hy_datareconstonestep_.6911', '<loop 6700.7566>', 'hy_datareconstructnormaldir_ppm_.7568', 'hy_upwindtransverseflux_.7579')",1105940.0,0.056652,0.148719


In [54]:
times = {
    _: timing.at_paths(suffix=(re.compile('hy_upwindtransverseflux_\..*'),)).compact
    for _, timing in timings.items()}

In [69]:
comparisons = {}
for problem in problems:
    problem_times = {name: val for name, val in times.items() if name[1] == problem}
    comparisons[problem] = pd.DataFrame(
        data=[val.values.tolist()[0] for _, val in problem_times.items()], index=list(problem_times.keys()),
        columns=next(iter(problem_times.values())).columns.values)

    base = comparisons[problem].at[(problem, 'base'), 'CPUTIME (usec):Mean (I)']
    comparisons[problem].insert(
        0, 'speedup vs base',
        [base / row['CPUTIME (usec):Mean (I)'] for _, row in comparisons[problem].iterrows()])

In [70]:
comparisons['Sedov']

Unnamed: 0,speedup vs base,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"(base, Sedov)",1.0,1165760.0,0.059613,0.153087
"(outlined, Sedov)",0.766065,1521750.0,0.075546,0.191413
"(autoinlined, Sedov)",1.05409,1105940.0,0.056652,0.148719


In [71]:
comparisons['Sod']

Unnamed: 0,speedup vs base,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"(base, Sod)",1.0,945516.0,0.081821,0.293096
"(outlined, Sod)",0.783505,1206778.0,0.097547,0.333384
"(autoinlined, Sod)",1.005937,939936.0,0.081339,0.286377


# scaling

experiment 3

In [None]:
logging.getLogger('hpctoolkit_dataframe').setLevel(logging.INFO)

In [41]:
_cols = [
    'file', 'line', 'procedure',
    'PAPI_TOT_CYC:Mean (I)', 'CPUTIME (usec):Mean (I)', 'CPUTIME (usec):Mean:num-src (I)']

In [42]:
hot_paths['Sod', 'base', 1][_cols][-3:]

Unnamed: 0,file,line,procedure,PAPI_TOT_CYC:Mean (I),CPUTIME (usec):Mean (I),CPUTIME (usec):Mean:num-src (I)
8280,<unknown file> [flash4],0,hy_datareconstructnormaldir_mh_,12640300000.0,3948620.0,1.0
8472,<unknown file> [flash4],0,hy_upwindtransverseflux_,3791800000.0,1048990.0,1.0
8528,<unknown file> [flash4],0,,3282500000.0,857432.0,1.0


In [43]:
hot_paths['Sod', 'base', 2][_cols][-3:]

Unnamed: 0,file,line,procedure,PAPI_TOT_CYC:Mean (I),CPUTIME (usec):Mean (I),CPUTIME (usec):Mean:num-src (I)
13621,<unknown file> [flash4],0,hy_datareconstructnormaldir_mh_,6310650000.0,2151080.0,2.0
13919,<unknown file> [flash4],0,hy_upwindtransverseflux_,1889900000.0,592360.0,2.0
13980,<unknown file> [flash4],0,,1626650000.0,516650.0,2.0


In [44]:
hot_paths['Sod', 'base', 4][_cols][-1:]

Unnamed: 0,file,line,procedure,PAPI_TOT_CYC:Mean (I),CPUTIME (usec):Mean (I),CPUTIME (usec):Mean:num-src (I)
18041,<unknown file> [flash4],0,hy_datareconstructnormaldir_mh_,3152425000.0,1095135.0,4.0


In [46]:
# total_cycles = {}
column = 'PAPI_TOT_CYC:Mean (I)'
#column = 'CPUTIME (usec):Mean (I)'
for (branch, nickname), problem in itertools.product(branch_nicknames.items(), problems):
    # print(_timing['PAPI_TOT_CYC:Mean (I)'] * mpi_proc, mpi_proc)
    #total_cycles[nickname, problem, mpi_proc] = _timing['PAPI_TOT_CYC:Mean (I)']
    print(branch, nickname)
    base_mpi_proc = mpi_processes[0]
    print('scaling efficiency vs', base_mpi_proc, 'MPI rank(s)')
    base_value = timings[nickname, problem, base_mpi_proc].loc['()'][column] * base_mpi_proc
    #print(base_value)
    for mpi_proc in mpi_processes[1:]:
        value = timings[nickname, problem, mpi_proc].loc['()'][column] * mpi_proc
        #print(value)
        loss = (value - base_value) / value
        eff = 1.0 - loss
        print('at', mpi_proc, 'MPI ranks:', eff, 'i.e. loss:', loss)

transpyle_experiments base
scaling efficiency vs 1 MPI rank(s)
at 2 MPI ranks: 0.8985329456301092 i.e. loss: 0.10146705436989083
at 4 MPI ranks: 0.5713035894931454 i.e. loss: 0.4286964105068546


In [18]:
metrics_by_id = {4: 'PAPI_TOT_CYC:Sum (I)',
 5: 'PAPI_TOT_CYC:Mean (I)',
 6: 'PAPI_TOT_CYC:Mean:num-src (I)',
 7: 'PAPI_TOT_CYC:StdDev (I)',
 8: 'PAPI_TOT_CYC:StdDev:accum2 (I)',
 9: 'PAPI_TOT_CYC:StdDev:num-src (I)',
 10: 'PAPI_TOT_CYC:CfVar (I)',
 11: 'PAPI_TOT_CYC:CfVar:accum2 (I)',
 12: 'PAPI_TOT_CYC:CfVar:num-src (I)',
 13: 'PAPI_TOT_CYC:Min (I)',
 14: 'PAPI_TOT_CYC:Max (I)',
 15: 'PAPI_TOT_CYC:Sum (E)',
 16: 'PAPI_TOT_CYC:Mean (E)',
 17: 'PAPI_TOT_CYC:Mean:num-src (E)',
 18: 'PAPI_TOT_CYC:StdDev (E)',
 19: 'PAPI_TOT_CYC:StdDev:accum2 (E)',
 20: 'PAPI_TOT_CYC:StdDev:num-src (E)',
 21: 'PAPI_TOT_CYC:CfVar (E)',
 22: 'PAPI_TOT_CYC:CfVar:accum2 (E)',
 23: 'PAPI_TOT_CYC:CfVar:num-src (E)',
 24: 'PAPI_TOT_CYC:Min (E)',
 25: 'PAPI_TOT_CYC:Max (E)',
 26: 'CPUTIME (usec):Sum (I)',
 27: 'CPUTIME (usec):Mean (I)',
 28: 'CPUTIME (usec):Mean:num-src (I)',
 29: 'CPUTIME (usec):StdDev (I)',
 30: 'CPUTIME (usec):StdDev:accum2 (I)',
 31: 'CPUTIME (usec):StdDev:num-src (I)',
 32: 'CPUTIME (usec):CfVar (I)',
 33: 'CPUTIME (usec):CfVar:accum2 (I)',
 34: 'CPUTIME (usec):CfVar:num-src (I)',
 35: 'CPUTIME (usec):Min (I)',
 36: 'CPUTIME (usec):Max (I)',
 37: 'CPUTIME (usec):Sum (E)',
 38: 'CPUTIME (usec):Mean (E)',
 39: 'CPUTIME (usec):Mean:num-src (E)',
 40: 'CPUTIME (usec):StdDev (E)',
 41: 'CPUTIME (usec):StdDev:accum2 (E)',
 42: 'CPUTIME (usec):StdDev:num-src (E)',
 43: 'CPUTIME (usec):CfVar (E)',
 44: 'CPUTIME (usec):CfVar:accum2 (E)',
 45: 'CPUTIME (usec):CfVar:num-src (E)',
 46: 'CPUTIME (usec):Min (E)',
 47: 'CPUTIME (usec):Max (E)'}

In [19]:
data = {'PAPI_TOT_CYC:Sum (I)': 40943900000.0, 'PAPI_TOT_CYC:Mean (I)': 40943900000.0, 'PAPI_TOT_CYC:Mean:num-src (I)': 1.0, 'PAPI_TOT_CYC:StdDev (I)': 40943900000.0, 'PAPI_TOT_CYC:StdDev:accum2 (I)': 1.6764e+21, 'PAPI_TOT_CYC:StdDev:num-src (I)': 1.0, 'PAPI_TOT_CYC:CfVar (I)': 40943900000.0, 'PAPI_TOT_CYC:CfVar:accum2 (I)': 1.6764e+21, 'PAPI_TOT_CYC:CfVar:num-src (I)': 1.0, 'PAPI_TOT_CYC:Min (I)': 40943900000.0, 'PAPI_TOT_CYC:Max (I)': 40943900000.0, 'PAPI_TOT_CYC:Mean:num-src (E)': 1.0, 'PAPI_TOT_CYC:StdDev:num-src (E)': 1.0, 'PAPI_TOT_CYC:CfVar:num-src (E)': 1.0, 'PAPI_TOT_CYC:Min (E)': 2.22507e-308, 'CPUTIME (usec):Sum (I)': 14399400.0, 'CPUTIME (usec):Mean (I)': 14399400.0, 'CPUTIME (usec):Mean:num-src (I)': 1.0, 'CPUTIME (usec):StdDev (I)': 14399400.0, 'CPUTIME (usec):StdDev:accum2 (I)': 207342000000000.0, 'CPUTIME (usec):StdDev:num-src (I)': 1.0, 'CPUTIME (usec):CfVar (I)': 14399400.0, 'CPUTIME (usec):CfVar:accum2 (I)': 207342000000000.0, 'CPUTIME (usec):CfVar:num-src (I)': 1.0, 'CPUTIME (usec):Min (I)': 14399400.0, 'CPUTIME (usec):Max (I)': 14399400.0, 'CPUTIME (usec):Mean:num-src (E)': 1.0, 'CPUTIME (usec):StdDev:num-src (E)': 1.0, 'CPUTIME (usec):CfVar:num-src (E)': 1.0, 'CPUTIME (usec):Min (E)': 2.22507e-308, 'location': ()}

In [31]:
data

{'PAPI_TOT_CYC:Sum (I)': 40943900000.0,
 'PAPI_TOT_CYC:Mean (I)': 40943900000.0,
 'PAPI_TOT_CYC:Mean:num-src (I)': 1.0,
 'PAPI_TOT_CYC:StdDev (I)': 40943900000.0,
 'PAPI_TOT_CYC:StdDev:accum2 (I)': 1.6764e+21,
 'PAPI_TOT_CYC:StdDev:num-src (I)': 1.0,
 'PAPI_TOT_CYC:CfVar (I)': 40943900000.0,
 'PAPI_TOT_CYC:CfVar:accum2 (I)': 1.6764e+21,
 'PAPI_TOT_CYC:CfVar:num-src (I)': 1.0,
 'PAPI_TOT_CYC:Min (I)': 40943900000.0,
 'PAPI_TOT_CYC:Max (I)': 40943900000.0,
 'PAPI_TOT_CYC:Mean:num-src (E)': 1.0,
 'PAPI_TOT_CYC:StdDev:num-src (E)': 1.0,
 'PAPI_TOT_CYC:CfVar:num-src (E)': 1.0,
 'PAPI_TOT_CYC:Min (E)': 2.22507e-308,
 'CPUTIME (usec):Sum (I)': 14399400.0,
 'CPUTIME (usec):Mean (I)': 14399400.0,
 'CPUTIME (usec):Mean:num-src (I)': 1.0,
 'CPUTIME (usec):StdDev (I)': 14399400.0,
 'CPUTIME (usec):StdDev:accum2 (I)': 207342000000000.0,
 'CPUTIME (usec):StdDev:num-src (I)': 1.0,
 'CPUTIME (usec):CfVar (I)': 14399400.0,
 'CPUTIME (usec):CfVar:accum2 (I)': 207342000000000.0,
 'CPUTIME (usec):CfVar:nu

In [23]:
from math import sqrt

In [22]:
sqrt(
    (data.get(metrics_by_id[8]) / data.get(metrics_by_id[9]))
    - pow(data.get(metrics_by_id[7]) / data.get(metrics_by_id[9]), 2))

ValueError: math domain error

In [29]:
_ = (data.get(metrics_by_id[8]) / data.get(metrics_by_id[9])) - pow(data.get(metrics_by_id[7]) / data.get(metrics_by_id[9]), 2)

In [30]:
sqrt(_)

ValueError: math domain error

## SNIa scaling

experiment 4

In [11]:
%%time

timings = {}
hot_paths = {}

for (branch, nickname), (problem, options), mpi_proc in itertools.product(
        branch_nicknames.items(), problems, mpi_processes):
    test_name = test_name_template.format(problem, nickname, mpi_proc)
    _profile_path = profile_db_path(test_date, test_name=test_name).joinpath('experiment.xml')
    df = HPCtoolkitDataFrame(path=_profile_path)
    timings[nickname, problem, mpi_proc] = df
    hot_paths[nickname, problem, mpi_proc] = df.hot_path()

CPU times: user 6.45 s, sys: 111 ms, total: 6.56 s
Wall time: 6.71 s


In [13]:
hot_paths['base', 'SNIa_DoubleDetonation', 0][:].compact

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
(),14374800.0,1.0,1.0
"('<program root>.10',)",14358800.0,0.998887,0.998887
"('<program root>.10', 'main.12')",14358800.0,0.998887,1.0
"('<program root>.10', 'main.12', 'driver_evolveflash_.698')",14076400.0,0.979241,0.980333
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>')",14020400.0,0.975346,0.996022
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210')",12120700.0,0.843191,0.864505
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836')",9637450.0,0.670441,0.795123
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>')",9246560.0,0.643248,0.959441
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134')",9218620.0,0.641304,0.996978
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137')",6027370.0,0.419301,0.653826


In [18]:
timings['base', 'SNIa_DoubleDetonation', 0] \
    .at_paths(prefix=('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>',
                      'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134')) \
    .at_depth(9).compact.sort_values('CPUTIME (usec):Mean (I) ratio of parent', ascending=False)[:10]

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137')",6027370.0,0.419301,0.653826
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getfaceflux_.5395')",2421240.0,0.168436,0.262647
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_unsplitupdate_.6637')",219425.0,0.015265,0.023802
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'eos_wrapped_.6873')",119660.0,0.008324,0.01298
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', '<loop 6622.9755>')",59879.0,0.004166,0.006495
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', '<loop 6614.9728>')",47861.0,0.00333,0.005192
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_energyfix_.6843')",23951.0,0.001666,0.002598
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', '<loop 6630.9778>')",23935.0,0.001665,0.002596
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'timers_stopstring_.6787')",23932.0,0.001665,0.002596
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'timers_stopstring_.9614')",23930.0,0.001665,0.002596


In [19]:
timings['base', 'SNIa_DoubleDetonation', 0] \
    .at_paths(prefix=('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>',
                      'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134',
                      'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>',
                      'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388')) \
    .at_depth(15).compact.sort_values('CPUTIME (usec):Mean (I) ratio of parent', ascending=False)[:10]

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', 'hy_upwindtransverseflux_.8603')",1156540.0,0.080456,0.286506
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', 'hy_tvdslope_.8917')",913727.0,0.063565,0.226355
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', '<statement 6043>')",610170.0,0.042447,0.151156
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', '<loop 6054.9229>')",474773.0,0.033028,0.117614
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', 'hy_eigenvector_.8524')",442741.0,0.0308,0.109679
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', 'hy_eigenparameters_.8494')",135532.0,0.009428,0.033575
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', '<loop 6050.9217>')",51858.0,0.003608,0.012847
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', '<loop 6046.9190>')",47839.0,0.003328,0.011851
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', '<loop 6096.9340>')",43899.0,0.003054,0.010875
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>', 'hy_computefluxes_.5134', 'hy_getriemannstate_.7137', '<loop 6842.7262>', '<loop 6844.7264>', 'hy_datareconstonestep_.7446', '<loop 6658.8342>', 'hy_datareconstructnormaldir_mh_.8388', 'hy_eigenvalue_.8516')",27912.0,0.001942,0.006915


In [22]:
timings['base', 'SNIa_DoubleDetonation', 0] \
    .at_paths(prefix=('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>',
                      'hydro_.3210', 'hy_advance_.4836')) \
    .at_depth(7).compact

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', 'grid_putfluxdata_.4838')",123758.0,0.008609,0.012841
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', 'timers_startstring_.4938')",3991.0,0.000278,0.000414
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', 'grid_conservefluxes_.4946')",79471.0,0.005528,0.008246
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', 'timers_stopstring_.5130')",,,
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6582.5132>')",9246560.0,0.643248,0.959441
"('<program root>.10', 'main.12', 'driver_evolveflash_.698', '<loop 458.748>', 'hydro_.3210', 'hy_advance_.4836', '<loop 6584.9815>')",183670.0,0.012777,0.019058
