In [1]:
import datetime

import line_profiler

from profiling_flash import profile_db_path
import hpctoolkit_dataframe
from hpctoolkit_dataframe import HPCtoolkitDataFrame

### Profiling data analysis itself

In [2]:
%%time


profiler = line_profiler.LineProfiler()
profiler.add_module(hpctoolkit_dataframe)
profiler.enable()

_profile_path = profile_db_path(
    datetime.datetime(2018, 9, 18, 14, 44, 19, 473557), test_name='subset_Sedov_baseline').joinpath('experiment.xml')
df = HPCtoolkitDataFrame(path=_profile_path, max_depth=None)

profiler.disable()

CPU times: user 2.87 s, sys: 21.4 ms, total: 2.89 s
Wall time: 2.88 s


In [3]:
%%time

profiler.enable()

hot_path = df.hot_path(threshold=0.01)

profiler.disable()

CPU times: user 1.68 s, sys: 4.26 ms, total: 1.68 s
Wall time: 1.68 s


In [4]:
%%time

profiler.enable()

df.compact

profiler.disable()

CPU times: user 1.32 ms, sys: 91 µs, total: 1.41 ms
Wall time: 1.22 ms


In [5]:
profiler.print_stats()

Timer unit: 1e-06 s

Total time: 0.068866 s
File: /nfs2/mbysiek/Projects/docker-transpyle-flash/hpctoolkit_dataframe.py
Function: _read_xml at line 49

Line #      Hits         Time  Per Hit   % Time  Line Contents
    49                                           def _read_xml(path: pathlib.Path) -> ET.ElementTree:
    50         1       1210.0   1210.0      1.8      with path.open() as xml_file:
    51         1      67655.0  67655.0     98.2          xml_data = ET.parse(xml_file)
    52         1          1.0      1.0      0.0      return xml_data

Total time: 3.8e-05 s
File: /nfs2/mbysiek/Projects/docker-transpyle-flash/hpctoolkit_dataframe.py
Function: _metrics_formula_sub_predicate at line 55

Line #      Hits         Time  Per Hit   % Time  Line Contents
    55                                           def _metrics_formula_sub_predicate(match: t.Match) -> str:
    56        36         38.0      1.1    100.0      return 'data.get(self._metrics_by_id[{}])'.format(match.group()[1:])

In [6]:
df.dtypes

CPUTIME (usec):Sum (I)                        float64
CPUTIME (usec):Mean (I)                       float64
CPUTIME (usec):Mean (I) ratio of parent       float64
CPUTIME (usec):Mean (I) ratio of total        float64
CPUTIME (usec):Mean:num-src (I)               float64
CPUTIME (usec):StdDev (I)                  complex128
CPUTIME (usec):StdDev:accum2 (I)              float64
CPUTIME (usec):StdDev:num-src (I)             float64
CPUTIME (usec):CfVar (I)                   complex128
CPUTIME (usec):CfVar:accum2 (I)               float64
CPUTIME (usec):CfVar:num-src (I)              float64
CPUTIME (usec):Min (I)                        float64
CPUTIME (usec):Max (I)                        float64
CPUTIME (usec):Sum (E)                        float64
CPUTIME (usec):Mean (E)                       float64
CPUTIME (usec):Mean:num-src (E)               float64
CPUTIME (usec):StdDev (E)                  complex128
CPUTIME (usec):StdDev:accum2 (E)              float64
CPUTIME (usec):StdDev:num-sr

In [7]:
hot_path.compact

Unnamed: 0,CPUTIME (usec):Mean (I),CPUTIME (usec):Mean (I) ratio of total,CPUTIME (usec):Mean (I) ratio of parent,module,file,line,procedure,type
-1,21146300.0,1.0,1.0,,,0,,root
6,21122400.0,0.99887,0.99887,libmonitor.so.0.0.0,<unknown file> [libmonitor.so.0.0.0],0,<program root>,procedure frame
8,21122400.0,0.99887,1.0,flash4,Flash.F90,43,main,procedure frame
9,21122400.0,0.99887,1.0,flash4,Flash.F90,43,,procedure
10,21122400.0,0.99887,1.0,flash4,Flash.F90,47,flash,procedure
154,20625700.0,0.975381,0.976485,flash4,Driver_evolveFlash.F90,38,driver_evolveflash_,procedure frame
170,20525700.0,0.970652,0.995152,flash4,Driver_evolveFlash.F90,164,driver_evolveflash_,loop
194,17406600.0,0.823151,0.848039,flash4,Hydro.F90,55,hydro_,procedure frame
707,12961200.0,0.61293,0.744614,flash4,hy_advance.F90,207,hy_advance_,procedure frame
708,12961200.0,0.61293,1.0,flash4,hy_advance.F90,75,<inline>,procedure


### Profiling data access

#### 1

`DataFrame.loc[[str(...)]]` vs `DataFrame[DataFrame.col == ...]` vs `DataFrame[DataFrame['col'] == ...]`

assuming that `DataFrame` is indexed by stringified column `col`

In [7]:
%%timeit

df.loc[[str(())]]

579 µs ± 34.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
type(df.loc[[str(())]])

hpctoolkit_dataframe.HPCtoolkitDataFrame

In [9]:
%%timeit

df[df.location == ()]

544 µs ± 36.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [10]:
type(df[df.location == ()])

hpctoolkit_dataframe.HPCtoolkitDataFrame

In [11]:
%%timeit

df[df['location'] == ()]

529 µs ± 36.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#### 2

`Series.at[...]` vs `Series.get(...)` vs `Series[...]`

In [12]:
filtered = df.loc['()']

In [13]:
type(filtered)

pandas.core.series.Series

In [14]:
%%timeit

df.loc['()']

135 µs ± 1.68 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [15]:
%%timeit

filtered.at['location']

7.71 µs ± 56.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [16]:
type(filtered.at['location'])

tuple

In [17]:
%%timeit

filtered.get('location')

9.82 µs ± 256 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [18]:
type(filtered.get('location'))

tuple

In [19]:
%%timeit

filtered['location']

9.4 µs ± 23.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [20]:
type(filtered['location'])

tuple

#### 3

`DataFrame.at['index', 'col']` vs `DataFrame.get('col').item()` vs `DataFrame['col'].item()`

assuming that `Dataframe` contains only one row (indexed as `'index'`)

In [21]:
filtered = df.loc[['()']]

In [22]:
type(filtered)

hpctoolkit_dataframe.HPCtoolkitDataFrame

In [23]:
%%timeit

df.loc[['()']]

522 µs ± 5.37 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [24]:
%%timeit

filtered.at['()', 'location']

3.69 µs ± 55.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [25]:
type(filtered.at['()', 'location'])

tuple

In [26]:
%%timeit

filtered.get('location').item()

2.61 µs ± 104 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [27]:
type(filtered.get('location').item())

tuple

In [28]:
%%timeit

filtered['location'].item()

2.43 µs ± 34.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [29]:
type(filtered['location'].item())

tuple