In [1]:
import os, glob
import sys, re

In [2]:
import pandas as pd

In [3]:
run_files = glob.glob('*.txt')

In [4]:
r0f = os.path.join(os.curdir, run_files[0])

In [5]:
pfx_counter_line_pattern = re.compile('^/[^{]+\{[^}]+\}/([^,]+,){4,5}[^,\n]+$', re.MULTILINE)
general_counter_form_pattern = re.compile('/(?P<object>[^{]+)\{locality#(?P<locality>\d+)/(?:(?:(?P<instance1>pool#[^/]+/[^#]+)#(?P<thread_id>\d+))|(?P<instance2>[^}]+))\}/(?P<counter>[^@]+)(?:@(?P<params>.+))?')

In [6]:
with open(r0f, 'r') as r0h:
    r0c = r0h.read()

In [7]:
# test
def test_pfx_counter_line_pattern():
    subject = '''
/octotiger{locality#0/total}/subgrid_leaves,2,3602.438779,[s],32428
/octotiger{locality#1/total}/subgrid_leaves,2,3602.428582,[s],34118
/octotiger{locality#2/total}/subgrid_leaves,2,3602.430246,[s],33918
/octotiger{locality#3/total}/subgrid_leaves,2,3602.431576,[s],34443
/octotiger{locality#4/total}/subgrid_leaves,2,3602.436175,[s],33173
/threads{locality#59/total/total}/count/cumulative,44,154828.198473,[s],2.3065e+09
/threads{locality#60/total/total}/count/cumulative,44,154828.221342,[s],2.24724e+09
/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09
/threads{locality#62/total/total}/count/cumulative,44,154828.221351,[s],2.20491e+09
/threads{locality#63/total/total}/count/cumulative,44,154828.216028,[s],2.05324e+09
/threads{locality#0/pool#default/worker-thread#0}/count/cumulative,44,154828.257432,[s],1.14378e+08
/threads{locality#0/pool#default/worker-thread#1}/count/cumulative,44,154828.260203,[s],1.05219e+08
/threads{locality#0/pool#default/worker-thread#2}/count/cumulative,44,154828.262954,[s],1.04616e+08
/threads{locality#0/pool#default/worker-thread#3}/count/cumulative,44,154828.262962,[s],1.04786e+08
    '''
    for i in pfx_counter_line_pattern.finditer(subject):
        assert(i is not None)
        print(i.group())
test_pfx_counter_line_pattern()

/octotiger{locality#0/total}/subgrid_leaves,2,3602.438779,[s],32428
/octotiger{locality#1/total}/subgrid_leaves,2,3602.428582,[s],34118
/octotiger{locality#2/total}/subgrid_leaves,2,3602.430246,[s],33918
/octotiger{locality#3/total}/subgrid_leaves,2,3602.431576,[s],34443
/octotiger{locality#4/total}/subgrid_leaves,2,3602.436175,[s],33173
/threads{locality#59/total/total}/count/cumulative,44,154828.198473,[s],2.3065e+09
/threads{locality#60/total/total}/count/cumulative,44,154828.221342,[s],2.24724e+09
/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09
/threads{locality#62/total/total}/count/cumulative,44,154828.221351,[s],2.20491e+09
/threads{locality#63/total/total}/count/cumulative,44,154828.216028,[s],2.05324e+09
/threads{locality#0/pool#default/worker-thread#0}/count/cumulative,44,154828.257432,[s],1.14378e+08
/threads{locality#0/pool#default/worker-thread#1}/count/cumulative,44,154828.260203,[s],1.05219e+08
/threads{locality#0/pool#default/worker-t

In [8]:
def test_general_counter_form_pattern():
    def run_impl(subject):
        m = general_counter_form_pattern.match(subject)
        assert m is not None
        print(m.groupdict())
        print(m.groups())
    # test
    run_impl('/threads{locality#0/pool#default/worker-thread#0}/count/cumulative')
    run_impl('/threads{locality#61/total/total}/count/cumulative,44,154828.184221,[s],2.28135e+09')
test_general_counter_form_pattern()

{'object': 'threads', 'locality': '0', 'instance1': 'pool#default/worker-thread', 'thread_id': '0', 'instance2': None, 'counter': 'count/cumulative', 'params': None}
('threads', '0', 'pool#default/worker-thread', '0', None, 'count/cumulative', None)
{'object': 'threads', 'locality': '61', 'instance1': None, 'thread_id': None, 'instance2': 'total/total', 'counter': 'count/cumulative,44,154828.184221,[s],2.28135e+09', 'params': None}
('threads', '61', None, None, 'total/total', 'count/cumulative,44,154828.184221,[s],2.28135e+09', None)


In [9]:
%%time
all_counters = []
for pfx_counter_line in pfx_counter_line_pattern.finditer(r0c):
    raw_line = pfx_counter_line.group(0)
    line_split = raw_line.split(',')
    if len(line_split) == 5:
        # unit for count values
        line_split += ('1')
    assert len(line_split) == 6
    raw_general_name = line_split[0]
    split_general_form = general_counter_form_pattern.match(raw_general_name)
    assert split_general_form is not None
    countername_groups = split_general_form.groupdict()
    if countername_groups['instance1'] is not None:
        countername_groups['instance'] = countername_groups['instance1']
    else:
        countername_groups['instance'] = countername_groups['instance2']
    
    del countername_groups['instance1']
    del countername_groups['instance2']

    all_counters += [(
        countername_groups["object"],
        countername_groups["locality"],
        countername_groups["instance"],
        countername_groups["counter"],
        countername_groups["thread_id"],
        countername_groups["params"],
    ) + tuple(line_split)]

df = pd.DataFrame(all_counters, columns=[
    'objectname', 'locality', 'instance', 'countername', 'thread_id',
    'parameters', 'general_form', 'iteration', 'timestamp',
    'timestamp_unit', 'value', 'unit'])

CPU times: user 2.15 s, sys: 128 ms, total: 2.27 s
Wall time: 2.31 s


In [10]:
df.locality = pd.to_numeric(df.locality)
df.iteration = pd.to_numeric(df.iteration)
df.timestamp = pd.to_numeric(df.timestamp)
df.value = pd.to_numeric(df.value)
df.loc[df.unit == '[0.01%]', 'unit']= .01

In [11]:
# test
def check_all_data_units():
    x = df.loc[df.unit != '1']
    x = x.loc[x.unit != 0.01]
    x = x.loc[x.unit != '[s]']
    x = x.loc[x.unit != '[ns]']
    print(x)

In [12]:
df

Unnamed: 0,objectname,locality,instance,countername,thread_id,parameters,general_form,iteration,timestamp,timestamp_unit,value,unit
0,agas,0,total,count/route,,,/agas{locality#0/total}/count/route,1,6.103603,[s],1.0,1
1,agas,1,total,count/route,,,/agas{locality#1/total}/count/route,1,6.064401,[s],0.0,1
2,agas,2,total,count/route,,,/agas{locality#2/total}/count/route,1,6.073655,[s],0.0,1
3,agas,3,total,count/route,,,/agas{locality#3/total}/count/route,1,6.050666,[s],0.0,1
4,agas,4,total,count/route,,,/agas{locality#4/total}/count/route,1,6.046651,[s],0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
313803,threads,63,pool#default/worker-thread,idle-rate,15,,/threads{locality#63/pool#default/worker-threa...,44,154828.338373,[s],9999.0,0.01
313804,threads,63,pool#default/worker-thread,idle-rate,16,,/threads{locality#63/pool#default/worker-threa...,44,154828.328259,[s],9999.0,0.01
313805,threads,63,pool#default/worker-thread,idle-rate,17,,/threads{locality#63/pool#default/worker-threa...,44,154828.330569,[s],9999.0,0.01
313806,threads,63,pool#default/worker-thread,idle-rate,18,,/threads{locality#63/pool#default/worker-threa...,44,154828.336126,[s],9999.0,0.01
