In [None]:
import pandas as pd
import bz2
import xml.etree.ElementTree as ET
import re
import os
import glob

pd.set_option('display.max_colwidth', None)

In [None]:
def parse_runs_from_bz2_xml(file_path):
    with bz2.open(file_path, 'rt', encoding='utf-8') as f:
        xml_content = f.read()
    
    root = ET.fromstring(xml_content)
    
    rows = []
    for run in root.findall('.//run'):
        name = run.get('name', '')
        expectedVerdict = run.get('expectedVerdict', '')
        
        files_attr = run.get('files', '')
        file_name = ''
        if files_attr:
            files_clean = files_attr.strip('[]')
            file_name = files_clean.split(',')[0].strip()

        # Extract file_name from files attribute, removing the prefix if present
        prefix = "../sv-benchmarks/c/"
        if file_name.startswith(prefix):
            file_name = file_name[len(prefix):]
        status = ''
        category = ''
        for col in run.findall('column'):
            title = col.get('title')
            if title == 'status':
                status = col.get('value', '')
            elif title == 'category':
                category = col.get('value', '')
        
        rows.append({
            'name': name,
            'file_name': file_name,
            'expectedVerdict': expectedVerdict,
            'status': status,
            'category': category,
        })
    
    return pd.DataFrame(rows)

def extract_tool_and_subcategory(filename):
    # Remove directory path
    base = os.path.basename(filename)
    parts = base.split('.')
    
    tool = parts[0] if len(parts) > 0 else ''
    subcategory = parts[4] if len(parts) > 4 else ''
    return tool, subcategory

def load_all_runs_by_tool_subcategory(directory):
    dataframes = {}
    
    for filepath in glob.glob(os.path.join(directory, '*.xml.bz2')):
        tool, subcategory = extract_tool_and_subcategory(filepath)
        
        parseddf = parse_runs_from_bz2_xml(filepath)
        
        if tool not in dataframes:
            dataframes[tool] = {}
        dataframes[tool][subcategory] = parseddf
    
    return dataframes
