In [4]:
import json
import re
from datetime import datetime


with open("output/libpcap/fuzzer_2024-07-14_06-33-13_rCURRENT.log") as f:
    logs = f.read()

_header = re.compile(
    r"^\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}) \+\d{2}:\d{2}\] (\w+) \[(.+?):(\d+)] (.+?)$"
)
def _parse_header(line: "str | list[str]") -> "None | dict | tuple[dict, list[str]]":
    header = line
    if isinstance(line, list):
        header, *_next = line
    found = _header.findall(header)
    if len(found) == 0:
        return None
    (date, type_, file, lineno, msg), *_ = found
    parsed = {
        "date": date, # datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f"),
        "type": type_,
        "file": file,
        "lineno": int(lineno),
        "msg": msg.strip(),
    }
    if isinstance(line, str):
        return parsed
    try:
        _next_header = next(
            i for i, l in enumerate(_next) if len(_header.findall(l)) > 0
        )
    except:
        _next_header = len(_next)
    parsed["msg"] = (msg + "\n" + "\n".join(_next[:_next_header])).strip()
    return parsed, _next[_next_header:]

def parse_log(logs: "str | list[str]") -> "list[dict]":
    if isinstance(logs, str):
        logs = logs.split("\n")
    parsed = []
    while len(logs) > 0:
        found, logs = _parse_header(logs)
        parsed.append(found)
    return parsed

logs = parse_log(logs)
with open("output/libpcap/log.json", "w") as f:
    json.dump(logs, f, indent=2, ensure_ascii=False)

In [35]:
def _check(_logs, _rules):
    _found = [
        any(len(rule.findall(log["msg"])) > 0 for rule in _rules)
        for log in _logs
    ]
    assert all(_found), [log for log, flag in zip(_logs, _found) if not flag]

_debug = [log for log in logs if log["type"] == "DEBUG"]  # 1897
_debug_rule = [
    re.compile(r"^Parsed \d+ typed gadgets$"),  # 1
    re.compile(r"^Parsed \d+ function gadgets.$"),  # 1
    re.compile(r"^starting new connection: .+?$"),  # 133
    re.compile(r"^LLM Generate time: \d+s$"),  # 241
    re.compile(r"^LLM generated \d+ programs. Sanitize those programs!$"),  # 241
    re.compile(r"^This round's sanitization Time Cost:"),  # 240
    re.compile(r"^Global sanitization time cost:"),  # 240
    re.compile(r"^\[round\] total:"),  # 240
    re.compile(r"^\[global\] total:"),  # 240
    re.compile(r"^\[Asan\] df:"),  # 240
    re.compile(r"^energies: \["),  # 80
    re.compile(r"^Update prompt queue according the incoming code coverage")
]
_check(_debug, _debug_rule)

_info = [log for log in logs if log["type"] == "INFO"]  # 642
_info_rule = [
    re.compile(r"^selected combination:"),  # 1
    re.compile(r"^Total OPENAI corpora cost:"),  # 241
    re.compile(r"^Fuzzer stuck in the current prompt, choose a new one."),  # 80
    re.compile(r"^Number of current unique seeds: "),  # 80
    re.compile(r"^random assemble new prompt combination with their energies.$"),  # 80
    re.compile(r"^set combination: "),  # 80
    re.compile(r"^\[Mutate Loop\]: loop: \d+"),  # 80
]
_check(_info, _info_rule)

_warn = [log for log in logs if log["type"] == "WARN"]  # 511
_warn_rule = [
### Rule for output/libpcap.backup240707
    # 344
    re.compile(r"^Meet unkown Asan err:\nthread 'main' panicked at src/bin/harness.rs:347:69:\ncalled `Result::unwrap\(\)` on an `Err` value: No such file or directory \(os error 2\)"),
    # 87
    # re.compile(r"^Meet unkown Asan err:\nINFO: Running with entropic power schedule"),
    re.compile(r"AddressSanitizer: global-buffer-overflow on address"),
    # 75
    re.compile(r"^Unable to get the definition of the type: .+?$"),
    # 2
    re.compile(r"^Meet unkown Asan err:\nthread 'main' panicked at src/bin/harness.rs:347:69:\ncalled `Result::unwrap\(\)` on an `Err` value: Cannot found suitable location to insert vardecl"),
    # 1
    re.compile(r"^http error: error sending request for url \(https://api.openai.com/v1/chat/completions\): operation timed out"),
    # 2
    re.compile(r"^Meet unkown Asan err:\nthread \'main\' panicked at src/bin/harness.rs:347:69:\ncalled `Result::unwrap\(\)` on an `Err` value: fail to extract from \""),
]
# _check(_warn, _warn_rule)
len(_warn), [
    sum([
        len(rule.findall(log["msg"])) > 0
        for log in _warn
    ])
    for rule in _warn_rule
], [
    log
    for log in _warn
    if all(len(rule.findall(log["msg"])) == 0 for rule in _warn_rule)
]

(6,
 [0, 4, 1, 0, 0, 0],
 [{'date': '2024-07-14 06:48:21.315553',
   'type': 'WARN',
   'file': 'src/execution/logger.rs',
   'lineno': 213,
   'msg': 'Meet unkown Asan err:\nthread \'main\' panicked at src/execution/sanitize.rs:285:13:\n"/home/fuzz/PromptFuzz/output/libpcap/work/id_000068/merge_control_file" does not exist!\nstack backtrace:\n   0:     0x556f6ef98785 - std::backtrace_rs::backtrace::libunwind::trace::h1a07e5dba0da0cd2\n                               at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/libunwind.rs:105:5\n   1:     0x556f6ef98785 - std::backtrace_rs::backtrace::trace_unsynchronized::h61b9b8394328c0bc\n                               at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5\n   2:     0x556f6ef98785 - std::sys_common::backtrace::_print_fmt::h1c5e18b460934cff\n                               at /rustc/129f3b9964af4d4a709d1383930ade12dfe7c081/libra