In [None]:
import os
import glob
import re #regex as re
import numpy as np
import pandas as pd

In [None]:
gl = glob.glob("logs_*")
gl

In [None]:
keys = ["tag", 
        "solver", 
        "nprocs", 
        "nthreads", 
        "mesh_ndofs", 
        "ncomponents"]
measures_teuchos = ["Assemble Time",
                    "BEM Reinitialisation Time", 
                    "Direct Integral Time",
                    "Epetra_CrsMatrix::Multiply(TransA,X,Y)",
                    "LAC Solve Time",
                    "FMA_preconditioner Time",
                    "Local Evaluation Time",
                    "Multipole Generation Time",
                    "Multipole Integral Time",
                    "Multipole MatrVec Products Time",
                    "Solve Time"
                   ]
measures_perf = ["task-clock",
                 "cycles",
                 "instructions",
                 "branches",
                 "branch-misses",
                 "L1-dcache-loads",
                 "L1-dcache-load-misses",
                 "LLC-loads",
                 "LLC-load-misses",
                 "time elapsed",
                 "user",
                 "sys",
                ]
cols = keys + measures_teuchos + measures_perf

In [None]:
def parse_filename(fname):
    ret = [#"", #"tag", 
           "", #"solver", 
           1, #"nprocs", 
           1, #"nthreads", 
           #"", #"mesh_npts", 
           1, #"ncomponents"
          ]
    
    if "direct" in fname:
        ret[0] = "direct"
    else:
        ret[0] = "fma"
    
    m = re.search(r"_np(\d+)", fname)
    if m:
        ret[1] = int(m.group(1))
    m = re.search(r"_nt(\d+)", fname)
    if m:
        ret[2] = int(m.group(1))
    
    if "complex" in fname:
        ret[3] = 2
        
    return ret

def load_file(path, blob, tag, solver, nprocs, nthreads, ncomponents):
    with open(path, "r") as f:
        record = dict()
        record["tag"] = tag
        record["solver"] = solver
        record["nprocs"] = nprocs
        record["nthreads"] = nthreads
        record["ncomponents"] = ncomponents
        
        reading = False
        measures_teuchos_remaining = set(measures_teuchos)
        measures_perf_remaining = set(measures_perf)
        for l in f.readlines():
            tokens = [t for t in l[:-1].split(" ") if t]

            if l.startswith("   Number of degrees of freedom: "):
                record["mesh_ndofs"] = int(tokens[-1])
            elif l.startswith("Timer Name"):
                #new record; reset previous?
                
                reading = True
            elif reading:
                found = False
                #try to get teuchos measures
                for s in measures_teuchos_remaining:
                    if l.startswith(s):
                        #discard the call count, there remains the time in s
                        tmp = float(tokens[-2])
                        if record["nprocs"] > 1:
                            tmp = float(tokens[-6])
                        #print(f"found {s} in line; attempting to add {tmp}")
                        #print(f"tokens are {tokens}")
                        record[s] = float(tmp)
                        
                        measures_teuchos_remaining.remove(s)
                        found = True
                        break
                
                if not found:
                    for s in measures_perf_remaining:
                        if s in l:
                            #the interesting value is in tokens[0], but it usually requires stripping "," or UoM conversion
                            tmp = tokens[0].replace(",", "")
                            #print(f"found {s} in line; attempting to add {tmp}")
                            #print(f"tokens are {tokens}")
                            
                            if s == "task-clock":
                                record[s] = float(tmp) / 1000
                            elif "." in tmp:
                                record[s] = float(tmp)
                            else:
                                record[s] = int(tmp)
                                
                            measures_perf_remaining.remove(s)
                            found = True
                            break
                #print(f"remaining keys: {len(measures_teuchos_remaining)} and {len(measures_perf_remaining)}")
                if not measures_teuchos_remaining and not measures_perf_remaining:
                    #finished a record
                    #print(f"finished a new record")
                    for k, v in record.items():
                        #print(f"{k}\t\t{v}")
                        blob[k].append(v)
                        
                    reading = False
                    measures_teuchos_remaining = set(measures_teuchos)
                    measures_perf_remaining = set(measures_perf)

In [None]:
#empty blob on which to build, once filled, the dataframe
blob = dict()
for c in cols:
    blob[c] = list()

gl = glob.glob("logs_*")
revisions = [r[5:] for r in gl]
for r in revisions:
    dname = os.path.join(os.getcwd(), "logs_" + r)
    for fname in os.listdir(dname):

        load_file(os.path.join(dname, fname),
                  blob, r, *parse_filename(fname))

#print(blob)
df = pd.DataFrame.from_dict(blob)
#df.head()
#df.groupby(keys).mean()

In [None]:
df.loc[df["solver"] == "fma"].groupby(keys).mean().sort_values( ["nprocs", "ncomponents", "tag"])

In [None]:
df.loc[df["solver"] == "fma"].groupby(keys).mean().sort_values( ["nprocs", "ncomponents", "tag"])