In [None]:
import json
from dataclasses import dataclass
from enum import Enum
from typing import List, Dict

class Impact(Enum):
    LOW = "LOW"
    MEDIUM = "MEDIUM"
    HIGH = "HIGH"

class Likelihood(Enum):
    LOW = "LOW"
    MEDIUM = "MEDIUM"
    HIGH = "HIGH"

class Severity(Enum):
    INFO = "INFO"
    WARNING = "WARNING"
    ERROR = "ERROR"

@dataclass
class IssueStruct:
    file_path: str
    impact: Impact
    likelihood: Likelihood
    severity: Severity
    cwes: List[str]

    def print(self, indent: int = 4):
        # Convert Enums to string for JSON serialization
        issue_dict = {
            "file_path": self.file_path,
            "impact": self.impact.value,
            "likelihood": self.likelihood.value,
            "severity": self.severity.value,
            "cwes": [self.cwes]
        }
        print(json.dumps(issue_dict, indent=indent))

def print_directory_map(directory_map, indent: int = 4):
    # Convert entire map to JSON-serializable dictionary
    output_dict = {}
    for directory, issues in directory_map.items():
        output_dict[directory] = {
            "detected_files_meta_data": [
                {
                    "file_path": issue.file_path,
                    "impact": issue.impact.value,
                    "likelihood": issue.likelihood.value,
                    "severity": issue.severity.value,
                    "cwes": [issue.cwes]
                }
                for issue in issues
            ]
        }
    json_str = json.dumps(output_dict, indent=indent)
    print(json_str)

In [None]:
# Initialize map: directory (string) -> list of IssueStruct
directory_map: Dict[str, List[IssueStruct]] = {}

# Load coverity.json
with open('/content/coverity.json', 'r') as infile:
    data = json.load(infile)

for issue in data.get('issues', []):
    file_path_raw = issue.get('strippedMainEventFilePathname', '')
    # Remove "data/" prefix
    if file_path_raw.startswith('data/'):
        file_path_clean = file_path_raw[len('data/'):]
    else:
        file_path_clean = file_path_raw

    # Determine map key (substring up to "/files")
    files_index = file_path_clean.find('/files')
    if files_index != -1:
        map_key = file_path_clean[:files_index]
    else:
        map_key = file_path_clean

    # Map impact
    impact_str = issue.get('checkerProperties', {}).get('impact', 'LOW').upper()
    impact_enum = Impact.__members__.get(impact_str, Impact.LOW)

    # Map likelihood
    occ_count = issue.get('occurrenceCountForMK', 1)
    if occ_count == 1:
        likelihood_enum = Likelihood.LOW
    elif occ_count < 5:
        likelihood_enum = Likelihood.MEDIUM
    else:
        likelihood_enum = Likelihood.HIGH

    # Map severity
    issue_kinds = issue.get('checkerProperties', {}).get('issueKinds', [])
    if 'SECURITY' in issue_kinds:
        severity_enum = Severity.ERROR
    else:
        severity_enum = Severity.WARNING

    # Map cwes
    cwes_list = "CWE-" + issue.get('checkerProperties', {}).get('cweCategory', [])

    issue_struct = IssueStruct(
        file_path=file_path_clean,
        impact=impact_enum,
        likelihood=likelihood_enum,
        severity=severity_enum,
        cwes=cwes_list
    )

    # Add to map
    if map_key not in directory_map:
        directory_map[map_key] = []
    directory_map[map_key].append(issue_struct)

In [None]:
# Optional: print map summary
for key, issues in directory_map.items():
    print(f"{key}: {len(issues)} issues")

In [None]:
print_directory_map(directory_map)

{
    "nautobot/54abe23": {
        "detected_files_meta_data": [
            {
                "file_path": "nautobot/54abe23/files/test_customfields.py",
                "impact": "MEDIUM",
                "likelihood": "LOW",
                "cwes": [
                    "CWE-398"
                ]
            },
            {
                "file_path": "nautobot/54abe23/files/test_customfields.py",
                "impact": "MEDIUM",
                "likelihood": "LOW",
                "cwes": [
                    "CWE-398"
                ]
            },
            {
                "file_path": "nautobot/54abe23/files/relationships.py",
                "impact": "MEDIUM",
                "likelihood": "LOW",
                "cwes": [
                    "CWE-476"
                ]
            }
        ]
    },
    "frappe/497ea86": {
        "detected_files_meta_data": [
            {
                "file_path": "frappe/497ea86/files/document.py",
                "impact":

In [None]:
import re
from collections import defaultdict

def parse_horusec_file(filename):
    directory_map = defaultdict(list)

    with open(filename, 'r') as f:
        content = f.read()

    entries = content.split('=' * 80)

    for entry in entries:
        file_path_match = re.search(r'File: /github/workspace/data/(.*)', entry)
        confidence_match = re.search(r'Confidence: (.*)', entry)
        severity_match = re.search(r'Severity: (.*)', entry)

        if not file_path_match:
            continue  # skip incomplete entries

        raw_file_path = file_path_match.group(1).strip()
        file_path = raw_file_path  # already stripped "/github/workspace/data"

        # derive directory key: everything up to "/files"
        dir_key = re.sub(r'/files/.*$', '', file_path)

        # Impact mapping
        conf = confidence_match.group(1).strip() if confidence_match else "MEDIUM"
        impact = Impact(conf.upper()) if conf.upper() in Impact.__members__ else Impact.MEDIUM

        # Severity mapping: map CRITICAL/HIGH/MEDIUM/LOW → ERROR/WARNING/INFO
        sev = severity_match.group(1).strip().upper() if severity_match else "INFO"
        if sev == "CRITICAL" or sev == "HIGH":
            severity_enum = Severity.ERROR
        elif sev == "MEDIUM":
            severity_enum = Severity.WARNING
        else:
            severity_enum = Severity.INFO

        # Likelihood: derive from confidence (or default MEDIUM)
        if conf.upper() == "LOW":
            likelihood = Likelihood.LOW
        elif conf.upper() == "HIGH":
            likelihood = Likelihood.HIGH
        else:
            likelihood = Likelihood.MEDIUM

        # --- Extract Details block ---
        details_start = entry.find('Details:')
        if details_start != -1:
            details_block = entry[details_start:]
            # stop at first empty line or next key
            details_lines = []
            for line in details_block.splitlines():
                if re.match(r'^\s*$', line):  # stop at empty line
                    break
                details_lines.append(line)
            details_text = ' '.join(details_lines)
        else:
            details_text = ""

        # Extract all CWE IDs from details_text
        cwe_ids = re.findall(r'CWE-(\d+)', details_text)
        cwe_list = [f"CWE-{cwe}" for cwe in set(cwe_ids)]

        issue = IssueStruct(
            file_path=file_path,
            impact=impact,
            likelihood=likelihood,
            severity=severity_enum,
            cwes=cwe_list
        )

        directory_map[dir_key].append(issue)

    return directory_map



In [None]:
directory_map = parse_horusec_file('/content/horusec_results.log')
print_directory_map(directory_map)

{
    "rdiffweb/e6f0d80": {
        "detected_files_meta_data": [
            {
                "file_path": "rdiffweb/e6f0d80/files/test.py",
                "impact": "MEDIUM",
                "likelihood": "MEDIUM",
                "severity": "ERROR",
                "cwes": [
                    [
                        "CWE-798"
                    ]
                ]
            },
            {
                "file_path": "rdiffweb/e6f0d80/files/test_page_admin_users.py",
                "impact": "MEDIUM",
                "likelihood": "MEDIUM",
                "severity": "ERROR",
                "cwes": [
                    [
                        "CWE-798"
                    ]
                ]
            },
            {
                "file_path": "rdiffweb/e6f0d80/files/test_page_admin_users.py",
                "impact": "MEDIUM",
                "likelihood": "MEDIUM",
                "severity": "ERROR",
                "cwes": [
                    [
        