In [2]:
import csv
import re
from pathlib import Path

LOG_DIR = Path("/home/user/Projects/hackathon/ml_preset/download_logs/latest/error")         
OUTPUT_CSV = Path("errors_summary.csv") 

ERROR_REGEXES = [
    r"^RPM build errors:",
    r"error: File not found:",
    r"Installed \(but unpackaged\) file\(s\) found",
    r"dangling symlink",
    r"scriptlet failed, exit status \d+",
    r"Failed (build )?dependencies:",
    r"patch.*FAILED",
    r"Bad exit status.*\(%\w+\)",
    r"Command exited with non-zero status",
    # ── build tools / генераторы ───────────────────────────────
    r"make\[?\d*]?: \*\*\* .*Error \d+",
    r"configure: error:",
    r"\bCMake Error\b",
    r"ninja: error:",
    r"Meson .*ERROR",
    # ── компиляторы / линковщики ───────────────────────────────
    r"ld(\.gold)?:.*undefined reference",
    r"ld(\.gold)?: cannot find",
    r"gcc.*: (fatal )?error:",
    r"clang.*: (fatal )?error:",
    r"Assembler messages:|Error:.*assembler",
    r"gfortran.*Error:",
    r"-Werror",
    r"fatal error: .*: No such file or directory",
    r"ld(\.gold)?: cannot find -l(\S+)",
    r"Version mismatch error",
    # ── Java / JVM / Gradle / Maven ────────────────────────────
    r"error: (cannot find|cannot access) symbol",
    r"\[ERROR\] Failed to execute goal",
    r"FAILURE: Build failed with an exception",
    # ── .NET ---------------------------------------------------
    r"error CS\d{4}:",
    # ── Qt / qmake --------------------------------------------
    r"Project ERROR:",
    # ── Rust / Cargo ------------------------------------------
    r"error: could not compile",
    # ── Go -----------------------------------------------------
    r"go (build|test|install): .*cannot find package",
    # ── Node / npm / yarn / node-gyp ---------------------------
    r"npm ERR!",
    r"error Command failed with exit code",
    r"gyp ERR! configure error",
    # ── Python / Perl / PHP / Ruby / Swift --------------------
    r"^\s*Traceback ",
    r"ModuleNotFoundError:",
    r"Can't locate .* in \@INC",
    r"\[ErrorException\]|Composer.*Error",
    r"Error installing .*:",
    r"swift.*error:",
    # ── тест-раннеры ------------------------------------------
    r"=+ \d+ failed, \d+ passed",
    r"\d+ tests? failed out of",
    r"Test Suites: .*failed",
    r"FAIL\s+.*\[build failed\]",
    r"Tests run: .*?\sFailures: [1-9]",
    # ── рантайм / краши ---------------------------------------
    r"Segmentation fault",
    r"(Aborted|Abort trap|SIGABRT)",
    r"panic: .*",
    # ── patch / git -------------------------------------------
    r"git am --abort|does not exist in index",
    # ── pkg-config / зависимости ------------------------------
    r"Package .* was not found in the pkg-config search path",
    r"No matching distribution found for [\w\-\.]+",
    r"Could not find a version that satisfies the requirement [^ ]+",
    r"go .*cannot find package \"[^\"]+\"",
    r"(incompatible|mismatch|unsupported).*version",
    r"requires.*version",
    r"error: possibly undefined macro",
    r"libtool: Version mismatch error",
    # ── универсальный «хвост» ---------------------------------
    r"\berror:",
]
ERROR_PATTERNS = [
    r"^RPM build errors:",
    r"error: File not found:",
    r"Installed \(but unpackaged\) file\(s\) found",
    r"dangling symlink",
    r"scriptlet failed, exit status \d+",
    r"Failed (build )?dependencies:",
    r"patch.*FAILED",
    r"Bad exit status.*\(%\w+\)",
    r"Command exited with non-zero status",
    # ── build tools / генераторы ───────────────────────────────
    r"make\[?\d*]?: \*\*\* .*Error \d+",
    r"configure: error:",
    r"\bCMake Error\b",
    r"ninja: error:",
    r"Meson .*ERROR",
    # ── компиляторы / линковщики ───────────────────────────────
    r"ld(\.gold)?:.*undefined reference",
    r"ld(\.gold)?: cannot find",
    r"gcc.*: (fatal )?error:",
    r"clang.*: (fatal )?error:",
    r"Assembler messages:|Error:.*assembler",
    r"gfortran.*Error:",
    r"-Werror",
    r"fatal error: .*: No such file or directory",
    r"ld(\.gold)?: cannot find -l(\S+)",
    r"Version mismatch error",
    # ── Java / JVM / Gradle / Maven ────────────────────────────
    r"error: (cannot find|cannot access) symbol",
    r"\[ERROR\] Failed to execute goal",
    r"FAILURE: Build failed with an exception",
    # ── .NET ---------------------------------------------------
    r"error CS\d{4}:",
    # ── Qt / qmake --------------------------------------------
    r"Project ERROR:",
    # ── Rust / Cargo ------------------------------------------
    r"error: could not compile",
    # ── Go -----------------------------------------------------
    r"go (build|test|install): .*cannot find package",
    # ── Node / npm / yarn / node-gyp ---------------------------
    r"npm ERR!",
    r"error Command failed with exit code",
    r"gyp ERR! configure error",
    # ── Python / Perl / PHP / Ruby / Swift --------------------
    r"^\s*Traceback ",
    r"ModuleNotFoundError:",
    r"Can't locate .* in \@INC",
    r"\[ErrorException\]|Composer.*Error",
    r"Error installing .*:",
    r"swift.*error:",
    # ── тест-раннеры ------------------------------------------
    r"=+ \d+ failed, \d+ passed",
    r"\d+ tests? failed out of",
    r"Test Suites: .*failed",
    r"FAIL\s+.*\[build failed\]",
    r"Tests run: .*?\sFailures: [1-9]",
    # ── рантайм / краши ---------------------------------------
    r"Segmentation fault",
    r"(Aborted|Abort trap|SIGABRT)",
    r"panic: .*",
    # ── patch / git -------------------------------------------
    r"git am --abort|does not exist in index",
    # ── pkg-config / зависимости ------------------------------
    r"Package .* was not found in the pkg-config search path",
    r"No matching distribution found for [\w\-\.]+",
    r"Could not find a version that satisfies the requirement [^ ]+",
    r"go .*cannot find package \"[^\"]+\"",
    r"(incompatible|mismatch|unsupported).*version",
    r"requires.*version",
    r"error: possibly undefined macro",
    r"libtool: Version mismatch error",
    # ── универсальный «хвост» ---------------------------------
    r"\berror:",
]

ERROR_PATTERNS = [re.compile(pat, re.I) for pat in ERROR_REGEXES]
rows = []

for log_path in LOG_DIR.glob("*"):
    if not log_path.is_file():
        continue

    with log_path.open(errors="replace") as fh:
        lines = fh.readlines()

    collected, in_rpm_tail = [], False
    for line in lines:
        if not in_rpm_tail and re.match(r"^RPM build errors:", line, re.I):
            in_rpm_tail = True
        if in_rpm_tail:
            collected.append(line.rstrip())
            continue

        if any(rx.search(line) for rx in ERROR_PATTERNS):
            collected.append(line.rstrip())

    rows.append((str(log_path.resolve()), "\n".join(collected)))

with OUTPUT_CSV.open("w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["file_path", "errors_text"])
    writer.writerows(rows)

print(f"Обработано {len(rows)} лог-файлов → {OUTPUT_CSV}")

Обработано 314 лог-файлов → errors_summary.csv
