Input: Octo-Tiger HPX output, including Performance counter values

Output: CSV
```csv
iteration,locality,subgrids,amr_bound,subgrid_leaves,idle_rate_0,idle_rate_1,...,idle_rate_19
```

In [None]:
import os, glob
import sys, re
import pandas as pd

In [None]:
run_files = glob.glob('*.txt')

In [None]:
r0f = os.path.join(os.curdir, run_files[0])
print(r0f)

Counter line search and counter name parsing regex patterns

In [None]:
pfx_counter_line_pattern = re.compile('^/[^{]+\{[^}]+\}/([^,]+,){4,5}[^,\n]+$', re.MULTILINE)
general_counter_form_pattern = re.compile('/(?P<object>[^{]+)\{locality#(?P<locality>\d+)/(?:(?:(?P<instance1>pool#[^/]+/[^#]+)#(?P<thread_id>\d+))|(?P<instance2>[^}]+))\}/(?P<counter>[^@]+)(?:@(?P<params>.+))?')

In [None]:
def test_general_counter_form_pattern(subject):
    m = general_counter_form_pattern.match(subject)
    assert m is not None
    return m.groupdict()

assert test_general_counter_form_pattern('/threads{locality#0/pool#default/worker-thread#0}/count/cumulative') == {'object': 'threads', 'locality': '0', 'instance1': 'pool#default/worker-thread', 'thread_id': '0', 'instance2': None, 'counter': 'count/cumulative', 'params': None}
assert test_general_counter_form_pattern('/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09') == {'object': 'threads', 'locality': '61', 'instance1': None, 'thread_id': None, 'instance2': 'total/total', 'counter': 'count/cumulative,44,154828.184221,[s],2.28135e+09', 'params': None}

In [None]:
def test_pfx_counter_line_pattern():
    subject = '''
/octotiger{locality#0/total}/subgrid_leaves,2,3602.438779,[s],32428
/octotiger{locality#1/total}/subgrid_leaves,2,3602.428582,[s],34118
/octotiger{locality#2/total}/subgrid_leaves,2,3602.430246,[s],33918
/octotiger{locality#3/total}/subgrid_leaves,2,3602.431576,[s],34443
/octotiger{locality#4/total}/subgrid_leaves,2,3602.436175,[s],33173
/threads{locality#59/total/total}/count/cumulative,44,154828.198473,[s],2.3065e+09
/threads{locality#60/total/total}/count/cumulative,44,154828.221342,[s],2.24724e+09
/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09
/threads{locality#62/total/total}/count/cumulative,44,154828.221351,[s],2.20491e+09
/threads{locality#63/total/total}/count/cumulative,44,154828.216028,[s],2.05324e+09
/threads{locality#0/pool#default/worker-thread#0}/count/cumulative,44,154828.257432,[s],1.14378e+08
/threads{locality#0/pool#default/worker-thread#1}/count/cumulative,44,154828.260203,[s],1.05219e+08
/threads{locality#0/pool#default/worker-thread#2}/count/cumulative,44,154828.262954,[s],1.04616e+08
/threads{locality#0/pool#default/worker-thread#3}/count/cumulative,44,154828.262962,[s],1.04786e+08
    '''
    return [i.group() for i in pfx_counter_line_pattern.finditer(subject)]

assert test_pfx_counter_line_pattern() == [
    '/octotiger{locality#0/total}/subgrid_leaves,2,3602.438779,[s],32428',
    '/octotiger{locality#1/total}/subgrid_leaves,2,3602.428582,[s],34118',
    '/octotiger{locality#2/total}/subgrid_leaves,2,3602.430246,[s],33918',
    '/octotiger{locality#3/total}/subgrid_leaves,2,3602.431576,[s],34443',
    '/octotiger{locality#4/total}/subgrid_leaves,2,3602.436175,[s],33173',
    '/threads{locality#59/total/total}/count/cumulative,44,154828.198473,[s],2.3065e+09',
    '/threads{locality#60/total/total}/count/cumulative,44,154828.221342,[s],2.24724e+09',
    '/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09',
    '/threads{locality#62/total/total}/count/cumulative,44,154828.221351,[s],2.20491e+09',
    '/threads{locality#63/total/total}/count/cumulative,44,154828.216028,[s],2.05324e+09',
    '/threads{locality#0/pool#default/worker-thread#0}/count/cumulative,44,154828.257432,[s],1.14378e+08',
    '/threads{locality#0/pool#default/worker-thread#1}/count/cumulative,44,154828.260203,[s],1.05219e+08',
    '/threads{locality#0/pool#default/worker-thread#2}/count/cumulative,44,154828.262954,[s],1.04616e+08',
    '/threads{locality#0/pool#default/worker-thread#3}/count/cumulative,44,154828.262962,[s],1.04786e+08'
]


Read one file for testing

In [None]:
with open(r0f, 'r') as r0h:
    r0c = r0h.read()

In [None]:
%%time
all_counters = []
for pfx_counter_line in pfx_counter_line_pattern.finditer(r0c):
    raw_line = pfx_counter_line.group(0)
    line_split = raw_line.split(',')
    if len(line_split) == 5:
        # unit for count values
        line_split += ('1')
    assert len(line_split) == 6
    raw_general_name = line_split[0]
    split_general_form = general_counter_form_pattern.match(raw_general_name)
    assert split_general_form is not None
    countername_groups = split_general_form.groupdict()
    if countername_groups['instance1'] is not None:
        countername_groups['instance'] = countername_groups['instance1']
    else:
        countername_groups['instance'] = countername_groups['instance2']
    
    del countername_groups['instance1']
    del countername_groups['instance2']

    all_counters += [(
        countername_groups["object"],
        countername_groups["locality"],
        countername_groups["instance"],
        countername_groups["counter"],
        countername_groups["thread_id"],
        countername_groups["params"],
    ) + tuple(line_split)]

df = pd.DataFrame(all_counters, columns=[
    'objectname', 'locality', 'instance', 'countername', 'thread_id',
    'parameters', 'general_form', 'iteration', 'timestamp',
    'timestamp_unit', 'value', 'unit'])
assert len(df) != 0

In [None]:
df.locality = pd.to_numeric(df.locality, downcast='unsigned')
df.iteration = pd.to_numeric(df.iteration, downcast='unsigned')
df.timestamp = pd.to_numeric(df.timestamp)
df.value = pd.to_numeric(df.value)
df.thread_id = pd.to_numeric(df.thread_id, downcast='unsigned')
df.loc[df.countername == 'idle-rate', 'value'] *= 0.01

In [None]:
# drop AGAS results
df = df.loc[df.objectname != 'agas']
# drop threads...pool#default/worker-thread...count/cumulative-phases	
df = df.loc[df.countername != 'count/cumulative-phases']	
df = df.loc[df.countername != 'count/cumulative']

In [None]:
# remove useless columns

# no parameters are expected
assert len(df.loc[~df.parameters.isnull()]) == 0
del df['parameters']

del df['timestamp_unit']

# units can only be [0.01%], 1, [s], and [ns]
def check_all_data_units():
    x = df.loc[(df.unit != '1')]
    x = x.loc[x.unit != '[0.01%]']
    x = x.loc[x.unit != '[s]']
    x = x.loc[x.unit != '[ns]']
    assert len(x) == 0
check_all_data_units()

del df['unit']

del df['timestamp']
del df['general_form']

In [None]:
df

In [None]:
octo_pivot = df.pivot_table(index=['iteration', 'locality'], columns=['countername'], values='value', dropna=False)
del octo_pivot['idle-rate']
octo_pivot

In [None]:
idle_rate_pivot = df.pivot_table(index=['iteration', 'locality'], columns=['thread_id'], values='value')
idle_rate_pivot

In [None]:
result = pd.concat([octo_pivot, idle_rate_pivot], axis=1)
result

In [None]:
result.to_csv('rcb12.term.csv')