Input: Octo-Tiger HPX output, including Performance counter values

Output: CSV
```csv
iteration,locality,subgrids,amr_bound,subgrid_leaves,idle_rate_0,idle_rate_1,...,idle_rate_19
```

In [1]:
import sys
import os

sys.path.insert(0, os.path.abspath('.'))

In [2]:
import re
import pandas as pd
import numpy as np
import rcb12_term.helpers as h

In [3]:
files = h.list_txt_files_in_cur_dir('*.txt.xz')
target = files[0]
print('subject:', target)
hpx_out = h.read_file(target)

subject: 64_4.txt.xz


In [4]:
dec_val_regex = re.compile(
    r'^(/[^,\n]+)(,[^,\n]+){4,5}$', re.MULTILINE
)

In [5]:
def generator(hpx_out):
    for i in dec_val_regex.finditer(hpx_out):
        yield i[0] + '\n'

In [6]:
class generator_reader(object):
    def __init__(self, hpx_out):
        self.hpx_out = hpx_out
        self.gen = generator(self.hpx_out)
        
    def __iter__(self):
        return self

    def read(self, n=0):
        try:
            o = next(self.gen)
            if 'CUDA' in o:
                #print(o)
                raise RuntimeError('Cant happen')
            return o
        except StopIteration:
            return ''

In [7]:
df = pd.read_csv(
    generator_reader(hpx_out),
    names=['full_counter_name', 'iteration', 'timestamp', 'timestamp_unit', 'value', 'value_unit'],
    dtype={'full_counter_name': 'str', 'iteration': 'uint64', 'timestamp': 'float64', 'timestamp_unit': 'str', 'value': 'float64', 'value_unit': 'str'},
    #index_col=False
)

In [8]:
assert 0 != len(df)

In [9]:
assert 0 == len(df[df.iteration.isna()])
assert 0 == len(df[df.timestamp.isna()])
assert 0 == len(df[df.timestamp_unit.isna()])
assert 0 == len(df[df.value.isna()])

In [10]:
df[['objectname', 'locality', 'instancename', 'countername']] = df.full_counter_name.str.extract('/(.+){locality#(\d+)/(.+)}/(.+)', expand=True)

In [11]:
assert 0 == len(df[df.objectname.isna()])
assert 0 == len(df[df.locality.isna()])
assert 0 == len(df[df.instancename.isna()])
assert 0 == len(df[df.countername.isna()])

In [12]:
df['thread_id'] = df.instancename.str.extract('pool#default/worker-thread#(\d+)', expand=True).astype('uint64', errors='ignore')
df.locality = df.locality.astype('uint64', errors='raise')

In [13]:
(
    df.iteration.dtype,
    df.locality.dtype,
    df.timestamp.dtype,
    df.timestamp_unit.dtype,
    df.value.dtype,
    df.value_unit.dtype,
    df.iteration.dtype,
    df.thread_id.dtype,
)

(dtype('uint64'),
 dtype('uint64'),
 dtype('float64'),
 dtype('O'),
 dtype('float64'),
 dtype('O'),
 dtype('uint64'),
 dtype('O'))

In [14]:
df.loc[df.value_unit == '[0.01%]', 'value'] *= 0.01

In [15]:
df.loc[df.value_unit == '[ns]', 'value'] *= 1.0e-9
df.loc[df.value_unit == '[ns]', 'value_unit'] = '[s]'

In [16]:
assert isinstance(df, pd.DataFrame)

In [17]:
df.iteration = pd.to_numeric(df.iteration, downcast='unsigned')
df.timestamp = pd.to_numeric(df.timestamp)
df.value = pd.to_numeric(df.value)
df.thread_id = pd.to_numeric(df.thread_id, downcast='unsigned')

In [18]:
assert 0 != len(df[df.objectname == 'octotiger'])

In [19]:
def drop_irrelevant_counters(df):
    # Drop AGAS results
    df.drop(df.index[df.objectname == 'agas'], inplace=True)
    # Drop threads...pool#default/worker-thread...count/cumulative-phases
    df.drop(df.index[df.countername == 'count/cumulative-phases'], inplace=True)
    df.drop(df.index[df.countername == 'count/cumulative'], inplace=True)
drop_irrelevant_counters(df)

In [20]:
#check_all_data_units(df)
# Units can only be [0.01%], 1, [s], and [ns]
for i in df.value_unit.unique():
    assert i in ['[s]', '[ns]', '[0.01%]'] or np.isnan(i)

In [21]:
#def remove_unused_columns(df):
# No parameters are expected
#assert len(df.loc[~df.parameters.isnull()]) == 0
#del df['parameters']

del df['timestamp_unit']
del df['value_unit']
del df['timestamp']
#del df['general_form']

In [22]:
def process_df(df):
    def get_octotiger_counters(df):
        octo_pivot = df.pivot_table(
            index=['iteration', 'locality'],
            columns=['countername'],
            values='value',
            dropna=False)
        del octo_pivot['idle-rate']
        return octo_pivot
    octotiger_counters = get_octotiger_counters(df)

    def get_idle_rate_counters(df):
        idle_rate_pivot = df.pivot_table(
            index=['iteration', 'locality'],
            columns=['thread_id'],
            values='value')
        return idle_rate_pivot
    idle_rates = get_idle_rate_counters(df)

    result = pd.concat([octotiger_counters, idle_rates], axis=1)
    result.reset_index(inplace=True)
    return result

In [23]:
df = process_df(df)

In [24]:
def get_csv_output_path(original_path):
    def get_stem(path):
        return os.path.splitext(path)[0]
    return get_stem(get_stem(original_path)) + '.csv'
of = get_csv_output_path(target)

In [25]:
df.to_csv(of, float_format='%g', index=False)