In [1]:
import pandas as pd
import glob
from datetime import datetime
import matplotlib.pyplot as plt

In [12]:
df = pd.DataFrame(columns=["kind", "run", "log", "lines", "time", "name", "path"])

for filename in glob.iglob('**', recursive=True):
    # skip exceptions, keep .txts with logs
    if "log" not in filename or filename[-4:] != ".txt" or "exception" in filename:
        continue
    tmp = filename.split('/')
    if not len(tmp) - 1:
        continue
    with open(filename, 'r') as fp:
        lines = len(fp.readlines())
        fp.close()
    kind, name = tmp[0], tmp[-1]

    # TODO: something with GXL logs
    if kind == "gxld":
        continue

    # parse datetime from file name
    try:
        time = datetime.strptime(filename[-21:-4], '%Y%m%d_%H-%M-%S') # everything but GXLD logs
    except Exception:
        try:
            time = datetime.strptime(filename[-18:-4], '%m-%d-%y_%H-%M') # specifically for GXLD logs
        except Exception as e:
            raise(e)
    
    # get 'run' and 'log' numbers
    runlog = name.split('_')[2]
    run, log = [int(x) for x in runlog.split('run')[-1].split('log')]

    row = {"kind": kind, "run":run, "log": log, "lines": lines, "time": time, "name": name, "path": filename}
    if df.name.str.contains(name).any():
        continue
    else:
        df.loc[len(df), :] = row
df.sort_values(by=['kind', 'run', 'log']).reset_index(drop=True, inplace=True)

In [14]:
def pass_no_pass(fp):
    with open(fp, 'r') as f:
        for line in f:
            if "PASSED" in line:
                return True
            elif "FAILED" in line:
                return False
        return "FUCK"
df.loc[: ,"passed"] = df.path.apply(pass_no_pass)

In [15]:
def parse_fp_for_int_diff(fp):
    prev = None
    diff = 0
    vals = []

    with open(fp, 'r') as f:
        for line in f:
            if prev == "[SECTION][INT]":
                prev = line[:14]
                delimiters = [" ", "\n"]
                for delimiter in delimiters:
                    line = " ".join(line.split(delimiter))
                split = list(map(int, line.split()))
                vals.append(split)
            else:
                prev = line[:14]
        f.close()
    for i in range(len(vals[0])):
        if vals[0][i] != vals[1][i]:
            diff += 1
    return diff
df.loc[:, 'int_diff'] = df.path.apply(parse_fp_for_int_diff)

In [16]:
def parse_fp_for_hex_diff(fp):
    prev = None
    diff = 0
    vals = []

    with open(fp, 'r') as f:
        for line in f:
            if prev == "[SECTION][HEX]":
                prev = line[:14]
                delimiters = [" ", "\n"]
                for delimiter in delimiters:
                    line = " ".join(line.split(delimiter))
                split = line.split()
                vals.append(split)
            else:
                prev = line[:14]
        f.close()
    for i in range(len(vals[0])):
        if vals[0][i] != vals[1][i]:
            diff += 1
    return diff

df.loc[:, 'hex_diff'] = df.path.apply(parse_fp_for_hex_diff)

In [17]:
df.head()

Unnamed: 0,kind,run,log,lines,time,name,path,passed,int_diff,hex_diff
0,mmb_static,2,2,30,2023-11-13 06:12:13,log_staticMMBtest_run2log2_20231113_06-12-13.txt,mmb_static/log_staticMMBtest_run2log2_20231113...,False,32768,32768
1,mmb_static,4,5,31,2023-11-13 00:46:20,log_staticMMBtest_run4log5_20231113_00-46-20.txt,mmb_static/_logs/log_staticMMBtest_run4log5_20...,False,32768,32768
2,mmb_static,4,6,31,2023-11-13 00:50:29,log_staticMMBtest_run4log6_20231113_00-50-29.txt,mmb_static/_logs/log_staticMMBtest_run4log6_20...,False,32768,32768
3,mmb_static,9,9,15,2023-11-13 00:56:52,log_staticMMBtest_run9log9_20231113_00-56-52.txt,mmb_static/_logs/log_staticMMBtest_run9log9_20...,True,32766,32766
4,mmb_static,4,4,30,2023-11-13 00:43:04,log_staticMMBtest_run4log4_20231113_00-43-04.txt,mmb_static/_logs/log_staticMMBtest_run4log4_20...,False,32768,32768
