In [1]:
import os
import re
import sys
import pandas as pd
from collections import defaultdict

Hotfix: set path to use packages in current project

In [15]:
project_root = os.path.join(os.getcwd(), "trace_parser")
data_dir = os.path.join(project_root, "data/scorep")
sys.path.append(os.path.join(project_root, "src"))

In [3]:
from scorep_trace_parser import IOOP, TraceParser
from scorep_trace_parser.custom_types import IOMod, IOPradigm

In [4]:
trace_dir = "./NPB-mpi-scorep"
traces = os.listdir(trace_dir)
traces = [os.path.join(trace_dir, trace, "traces.otf2") for trace in traces]

In [5]:
traces

['./NPB-mpi-scorep/bt.C.16.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.A.1.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.C.1.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.B.9.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.B.16.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.A.25.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.B.25.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.C.4.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.C.9.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.A.16.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.B.1.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.A.9.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.A.4.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.B.4.mpi_io_full/traces.otf2',
 './NPB-mpi-scorep/bt.C.25.mpi_io_full/traces.otf2']

In [6]:
pradigm_mod_combs = [
    (IOPradigm.POSIX, IOMod.READ),
    (IOPradigm.POSIX, IOMod.WRITE),
    (IOPradigm.MPIIO, IOMod.READ),
    (IOPradigm.MPIIO, IOMod.WRITE),
]

In [8]:
agg_data = defaultdict(dict)
for trace in traces:
    trace_parser = TraceParser(trace)
    timer_res = trace_parser.time_resolution
    elapsed_time = trace_parser.aggregate_op_stat(lambda op: op.duration / timer_res)
    data_size  = trace_parser.aggregate_op_stat(lambda op: op.bytes_request)
    bandwidth = trace_parser.aggregate_op_stat(lambda op: op.byte_rate * timer_res)

    # Use name part bt.X.Y of trace as key
    k = re.search(r"(bt\.[A-Z]\.\d{1,2})(?:\.mpi_io_full)?", trace).group(1)
    paradigm_mod_combs = elapsed_time.keys()
    for mod_typ in paradigm_mod_combs:
        agg_data[mod_typ].update({
            f"{k}_time": pd.Series(elapsed_time[mod_typ]),
            f"{k}_size": pd.Series(data_size[mod_typ]),
            f"{k}_bandwidth": pd.Series(bandwidth[mod_typ])
        })

In [11]:
csv_file = {
    (IOMod.READ, IOPradigm.MPIIO): "MPI_READ",
    (IOMod.WRITE, IOPradigm.MPIIO): "MPI_WRITE",
    (IOMod.READ, IOPradigm.POSIX): "POSIX_READ",
    (IOMod.WRITE, IOPradigm.POSIX): "POSIX_WRITE"
}

In [16]:
for k, v in agg_data.items():
    csv_out = os.path.join(data_dir, f"{csv_file[k]}.csv")
    pd.DataFrame(v).to_csv(csv_out)