Input: Octo-Tiger HPX output, including Performance counter values

Output: CSV
```csv
iteration,locality,subgrids,amr_bound,subgrid_leaves,idle_rate_0,idle_rate_1,...,idle_rate_19
```

In [1]:
import os, glob
import sys, re
import pandas as pd

In [2]:
run_files = glob.glob('*.txt')

In [3]:
r0f = os.path.join(os.curdir, run_files[0])
print(r0f)

./64_3.txt


Counter line search and counter name parsing regex patterns

In [4]:
pfx_counter_line_pattern = re.compile('^/[^{]+\{[^}]+\}/([^,]+,){4,5}[^,\n]+$', re.MULTILINE)
general_counter_form_pattern = re.compile('/(?P<object>[^{]+)\{locality#(?P<locality>\d+)/(?:(?:(?P<instance1>pool#[^/]+/[^#]+)#(?P<thread_id>\d+))|(?P<instance2>[^}]+))\}/(?P<counter>[^@]+)(?:@(?P<params>.+))?')

In [5]:
def test_general_counter_form_pattern(subject):
    m = general_counter_form_pattern.match(subject)
    assert m is not None
    return m.groupdict()

assert test_general_counter_form_pattern('/threads{locality#0/pool#default/worker-thread#0}/count/cumulative') == {'object': 'threads', 'locality': '0', 'instance1': 'pool#default/worker-thread', 'thread_id': '0', 'instance2': None, 'counter': 'count/cumulative', 'params': None}
assert test_general_counter_form_pattern('/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09') == {'object': 'threads', 'locality': '61', 'instance1': None, 'thread_id': None, 'instance2': 'total/total', 'counter': 'count/cumulative,44,154828.184221,[s],2.28135e+09', 'params': None}

In [6]:
def test_pfx_counter_line_pattern():
    subject = '''
/octotiger{locality#0/total}/subgrid_leaves,2,3602.438779,[s],32428
/octotiger{locality#1/total}/subgrid_leaves,2,3602.428582,[s],34118
/octotiger{locality#2/total}/subgrid_leaves,2,3602.430246,[s],33918
/octotiger{locality#3/total}/subgrid_leaves,2,3602.431576,[s],34443
/octotiger{locality#4/total}/subgrid_leaves,2,3602.436175,[s],33173
/threads{locality#59/total/total}/count/cumulative,44,154828.198473,[s],2.3065e+09
/threads{locality#60/total/total}/count/cumulative,44,154828.221342,[s],2.24724e+09
/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09
/threads{locality#62/total/total}/count/cumulative,44,154828.221351,[s],2.20491e+09
/threads{locality#63/total/total}/count/cumulative,44,154828.216028,[s],2.05324e+09
/threads{locality#0/pool#default/worker-thread#0}/count/cumulative,44,154828.257432,[s],1.14378e+08
/threads{locality#0/pool#default/worker-thread#1}/count/cumulative,44,154828.260203,[s],1.05219e+08
/threads{locality#0/pool#default/worker-thread#2}/count/cumulative,44,154828.262954,[s],1.04616e+08
/threads{locality#0/pool#default/worker-thread#3}/count/cumulative,44,154828.262962,[s],1.04786e+08
    '''
    return [i.group() for i in pfx_counter_line_pattern.finditer(subject)]

assert test_pfx_counter_line_pattern() == [
    '/octotiger{locality#0/total}/subgrid_leaves,2,3602.438779,[s],32428',
    '/octotiger{locality#1/total}/subgrid_leaves,2,3602.428582,[s],34118',
    '/octotiger{locality#2/total}/subgrid_leaves,2,3602.430246,[s],33918',
    '/octotiger{locality#3/total}/subgrid_leaves,2,3602.431576,[s],34443',
    '/octotiger{locality#4/total}/subgrid_leaves,2,3602.436175,[s],33173',
    '/threads{locality#59/total/total}/count/cumulative,44,154828.198473,[s],2.3065e+09',
    '/threads{locality#60/total/total}/count/cumulative,44,154828.221342,[s],2.24724e+09',
    '/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09',
    '/threads{locality#62/total/total}/count/cumulative,44,154828.221351,[s],2.20491e+09',
    '/threads{locality#63/total/total}/count/cumulative,44,154828.216028,[s],2.05324e+09',
    '/threads{locality#0/pool#default/worker-thread#0}/count/cumulative,44,154828.257432,[s],1.14378e+08',
    '/threads{locality#0/pool#default/worker-thread#1}/count/cumulative,44,154828.260203,[s],1.05219e+08',
    '/threads{locality#0/pool#default/worker-thread#2}/count/cumulative,44,154828.262954,[s],1.04616e+08',
    '/threads{locality#0/pool#default/worker-thread#3}/count/cumulative,44,154828.262962,[s],1.04786e+08'
]


Read one file for testing

In [7]:
with open(r0f, 'r') as r0h:
    r0c = r0h.read()

In [8]:
%%time
all_counters = []
for pfx_counter_line in pfx_counter_line_pattern.finditer(r0c):
    raw_line = pfx_counter_line.group(0)
    line_split = raw_line.split(',')
    if len(line_split) == 5:
        # unit for count values
        line_split += ('1')
    assert len(line_split) == 6
    raw_general_name = line_split[0]
    split_general_form = general_counter_form_pattern.match(raw_general_name)
    assert split_general_form is not None
    countername_groups = split_general_form.groupdict()
    if countername_groups['instance1'] is not None:
        countername_groups['instance'] = countername_groups['instance1']
    else:
        countername_groups['instance'] = countername_groups['instance2']
    
    del countername_groups['instance1']
    del countername_groups['instance2']

    all_counters += [(
        countername_groups["object"],
        countername_groups["locality"],
        countername_groups["instance"],
        countername_groups["counter"],
        countername_groups["thread_id"],
        countername_groups["params"],
    ) + tuple(line_split)]

df = pd.DataFrame(all_counters, columns=[
    'objectname', 'locality', 'instance', 'countername', 'thread_id',
    'parameters', 'general_form', 'iteration', 'timestamp',
    'timestamp_unit', 'value', 'unit'])
assert len(df) != 0

CPU times: user 2.14 s, sys: 114 ms, total: 2.26 s
Wall time: 2.28 s


In [9]:
df.locality = pd.to_numeric(df.locality, downcast='unsigned')
df.iteration = pd.to_numeric(df.iteration, downcast='unsigned')
df.timestamp = pd.to_numeric(df.timestamp)
df.value = pd.to_numeric(df.value)
df.thread_id = pd.to_numeric(df.thread_id, downcast='unsigned')
df.loc[df.countername == 'idle-rate', 'value'] *= 0.01

In [10]:
# drop AGAS results
df = df.loc[df.objectname != 'agas']
# drop threads...pool#default/worker-thread...count/cumulative-phases	
df = df.loc[df.countername != 'count/cumulative-phases']	
df = df.loc[df.countername != 'count/cumulative']

In [11]:
# remove useless columns

# no parameters are expected
assert len(df.loc[~df.parameters.isnull()]) == 0
del df['parameters']

del df['timestamp_unit']

# units can only be [0.01%], 1, [s], and [ns]
def check_all_data_units():
    x = df.loc[(df.unit != '1')]
    x = x.loc[x.unit != '[0.01%]']
    x = x.loc[x.unit != '[s]']
    x = x.loc[x.unit != '[ns]']
    assert len(x) == 0
check_all_data_units()

del df['unit']

del df['timestamp']
del df['general_form']

In [12]:
#df.set_index(['iteration', 'locality', 'objectname', 'instance', 'countername'], inplace=True)

In [32]:
df

Unnamed: 0,objectname,locality,instance,countername,thread_id,iteration,value
5596,octotiger,0,total,subgrids,,1,0.00
5597,octotiger,1,total,subgrids,,1,0.00
5598,octotiger,2,total,subgrids,,1,0.00
5599,octotiger,3,total,subgrids,,1,0.00
5600,octotiger,4,total,subgrids,,1,0.00
...,...,...,...,...,...,...,...
313803,threads,63,pool#default/worker-thread,idle-rate,15.0,44,99.99
313804,threads,63,pool#default/worker-thread,idle-rate,16.0,44,99.99
313805,threads,63,pool#default/worker-thread,idle-rate,17.0,44,99.99
313806,threads,63,pool#default/worker-thread,idle-rate,18.0,44,99.99


In [45]:
y = df.pivot_table(index=['iteration', 'locality'], columns=['countername'], values='value', dropna=False)
del y['idle-rate']
y

Unnamed: 0_level_0,countername,amr_bounds,subgrid_leaves,subgrids
iteration,locality,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0,0.0,0.0,0.0
1,1,0.0,0.0,0.0
1,2,0.0,0.0,0.0
1,3,0.0,0.0,0.0
1,4,0.0,0.0,0.0
...,...,...,...,...
44,59,23920.0,6028.0,15481.0
44,60,63612.0,15037.0,25087.0
44,61,11652.0,60117.0,18646.0
44,62,30880.0,18753.0,21916.0


In [46]:
x = df.pivot_table(index=['iteration', 'locality'], columns=['thread_id'], values='value')
x

Unnamed: 0_level_0,thread_id,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0
iteration,locality,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,97.43,97.91,97.97,98.00,98.04,97.95,98.00,97.95,97.82,97.89,96.85,99.13,99.07,99.09,99.17,99.05,99.13,99.06,99.13,99.08
1,1,99.95,99.97,99.95,99.97,99.98,99.91,99.97,99.97,99.91,99.81,99.82,99.97,99.99,99.98,99.98,99.98,99.96,99.97,99.99,99.97
1,2,99.94,99.97,99.94,99.90,99.97,99.97,99.97,99.97,99.97,99.90,99.98,99.97,99.98,99.97,99.97,99.98,99.96,99.98,99.98,99.97
1,3,99.94,99.94,99.95,99.97,99.96,99.97,99.97,99.90,99.97,99.98,99.98,99.98,99.98,99.98,99.98,99.97,99.97,99.97,99.98,99.98
1,4,99.96,99.80,99.97,99.97,99.97,99.90,99.98,99.98,99.94,99.97,99.97,99.97,99.98,99.97,99.97,99.97,99.98,99.98,99.98,99.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,59,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99
44,60,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99
44,61,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99
44,62,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99


In [48]:
z = pd.concat([y, x], axis=1, sort=False)

In [49]:
z.to_csv('rcb12.term.csv')

In [14]:
df2 = df.copy()

# drop data without column id
df2.dropna(subset=['thread_id'], inplace=True)

df2.set_index(['iteration', 'locality', 'objectname', 'instance', 'countername'], inplace=True)
df2 = df2.pivot(columns='thread_id')
print(df2.index.names)

#df2
df3 = df2.reset_index()
df3

['iteration', 'locality', 'objectname', 'instance', 'countername']


Unnamed: 0_level_0,iteration,locality,objectname,instance,countername,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value
thread_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,0.0,1.0,2.0,3.0,4.0,...,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0
0,1,0,threads,pool#default/worker-thread,idle-rate,97.43,97.91,97.97,98.00,98.04,...,96.85,99.13,99.07,99.09,99.17,99.05,99.13,99.06,99.13,99.08
1,1,1,threads,pool#default/worker-thread,idle-rate,99.95,99.97,99.95,99.97,99.98,...,99.82,99.97,99.99,99.98,99.98,99.98,99.96,99.97,99.99,99.97
2,1,2,threads,pool#default/worker-thread,idle-rate,99.94,99.97,99.94,99.90,99.97,...,99.98,99.97,99.98,99.97,99.97,99.98,99.96,99.98,99.98,99.97
3,1,3,threads,pool#default/worker-thread,idle-rate,99.94,99.94,99.95,99.97,99.96,...,99.98,99.98,99.98,99.98,99.98,99.97,99.97,99.97,99.98,99.98
4,1,4,threads,pool#default/worker-thread,idle-rate,99.96,99.80,99.97,99.97,99.97,...,99.97,99.97,99.98,99.97,99.97,99.97,99.98,99.98,99.98,99.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2811,44,59,threads,pool#default/worker-thread,idle-rate,99.99,99.99,99.99,99.99,99.99,...,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99
2812,44,60,threads,pool#default/worker-thread,idle-rate,99.99,99.99,99.99,99.99,99.99,...,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99
2813,44,61,threads,pool#default/worker-thread,idle-rate,99.99,99.99,99.99,99.99,99.99,...,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99
2814,44,62,threads,pool#default/worker-thread,idle-rate,99.99,99.99,99.99,99.99,99.99,...,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99,99.99


In [32]:
df3 = df.copy()
df3 = df3.loc[df3.thread_id.isna()]
del df3['thread_id']
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,value
iteration,locality,objectname,instance,countername,Unnamed: 5_level_1
1,0,agas,total,count/route,1.0
1,1,agas,total,count/route,0.0
1,2,agas,total,count/route,0.0
1,3,agas,total,count/route,0.0
1,4,agas,total,count/route,0.0
...,...,...,...,...,...
44,59,threads,total/total,idle-rate,9999.0
44,60,threads,total/total,idle-rate,9999.0
44,61,threads,total/total,idle-rate,9999.0
44,62,threads,total/total,idle-rate,9999.0


In [33]:
df3.reset_index(level='objectname', inplace=True)

In [37]:
df3.value

iteration  locality  instance     countername
1          0         total        count/route       1.0
           1         total        count/route       0.0
           2         total        count/route       0.0
           3         total        count/route       0.0
           4         total        count/route       0.0
                                                  ...  
44         59        total/total  idle-rate      9999.0
           60        total/total  idle-rate      9999.0
           61        total/total  idle-rate      9999.0
           62        total/total  idle-rate      9999.0
           63        total/total  idle-rate      9999.0
Name: value, Length: 144848, dtype: float64