In [2]:
import os
import csv
import xml.etree.ElementTree as ET

# --- Configuration ---
# Folder containing the XML files. 
# Update this if your XMLs are in a different location relative to the script.
DATA_FOLDER = "result_xmls" 
OUTPUT_FILE = "successful_runs_w_results.csv"

# The list of (Algorithm Name, BTOR2 XML Path, C XML Path) pairs
PAIRS = [
    ("CEGAR_PRED", 
     os.path.join(DATA_FOLDER, "btor2-algos-opt.2025-12-07_02-16-12.results.btor2-pred-cegar.btor2-pred.xml"), 
     os.path.join(DATA_FOLDER, "c-algos.2025-12-07_02-30-26.results.c-pred-cegar.c-pred.xml")),
    ("CEGAR_EXPL", 
     os.path.join(DATA_FOLDER, "btor2-algos-opt.2025-12-07_02-16-12.results.btor2-expl-cegar.btor2-expl.xml"), 
     os.path.join(DATA_FOLDER, "c-algos.2025-12-07_02-30-26.results.c-expl-cegar.c-expl.xml")),
    ("BMC", 
     os.path.join(DATA_FOLDER, "btor2-algos-opt.2025-12-07_02-16-12.results.btor2-bounded.btor2.xml"), 
     os.path.join(DATA_FOLDER, "c-algos.2025-12-07_02-30-26.results.c-bounded.c.xml")),
    ("IMC", 
     os.path.join(DATA_FOLDER, "btor2-algos-opt.2025-12-07_02-16-12.results.btor2-imc.btor2.xml"), 
     os.path.join(DATA_FOLDER, "c-algos.2025-12-07_02-30-26.results.c-imc.c.xml")),
    ("K-Induction", 
     os.path.join(DATA_FOLDER, "btor2-algos-opt.2025-12-07_02-16-12.results.btor2-kind.btor2.xml"), 
     os.path.join(DATA_FOLDER, "c-algos.2025-12-07_02-30-26.results.c-kind.c.xml")),
]

def parse_benchmark_xml(xml_path, algorithm, input_type, csv_writer):
    """
    Parses a single XML benchmark result file and writes successful runs to the CSV.
    """
    if not os.path.exists(xml_path):
        print(f"Warning: File not found: {xml_path}")
        return

    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # BenchExec results store individual test runs in <run> tags
        for run in root.findall('run'):
            # We assume a run is successful if we find a column category="correct"
            is_successful = False
            cpu_time = None
            memory = None

            # Iterate through columns to find status, cputime, and memory
            for column in run.findall('column'):
                title = column.get('title')
                value = column.get('value')

                if title == 'category' and value == 'correct':
                    is_successful = True
                elif title == 'cputime':
                    cpu_time = value
                elif title == 'memory':
                    memory = value
                elif title == 'status':
                    status = value

            if is_successful:
                # Extract filename from the full path in the 'name' attribute
                full_name = run.get('name', '')
                file_name = os.path.basename(full_name)

                # Write the row to CSV
                csv_writer.writerow([
                    file_name,
                    input_type,
                    algorithm,
                    cpu_time,
                    memory,
                    status
                ])

    except ET.ParseError as e:
        print(f"Error parsing XML {xml_path}: {e}")

def main():
    print(f"Starting processing. Output will be saved to {OUTPUT_FILE}...")
    
    with open(OUTPUT_FILE, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        # Write Header
        writer.writerow(['Filename', 'Input Type', 'Algorithm', 'CPU Time', 'Memory', 'Result'])

        for algo_name, btor2_xml_path, c_xml_path in PAIRS:
            # Process the BTOR2 file (2nd element in tuple)
            parse_benchmark_xml(btor2_xml_path, algo_name, "BTOR2", writer)
            
            # Process the C file (3rd element in tuple)
            parse_benchmark_xml(c_xml_path, algo_name, "C", writer)

    print("Processing complete.")

if __name__ == "__main__":
    main()

Starting processing. Output will be saved to successful_runs_w_results.csv...
Processing complete.


In [3]:
import os
import csv
import xml.etree.ElementTree as ET

# --- Configuration ---
# List of files to process. Each entry is a tuple: (File Path, Input Type Label)
# We use raw strings (r"...") to safely handle the backslashes in the paths.
FILES_TO_PROCESS = [
    (r"xcfa_analyses\fight-xcfa.2025-12-07_11-41-58.results.btor2-xcfa.btor2.xml", "BTOR2"),
    (r"xcfa_analyses\fight-xcfa.2025-12-07_11-41-58.results.c-xcfa.c.xml", "C")
]

OUTPUT_FILE = "xcfa_successful_runs.csv"

def parse_and_append_results(xml_path, input_type_label, csv_writer):
    """
    Parses a benchmark result file and appends successful runs to the CSV writer.
    """
    # Check if file exists to avoid crashing
    if not os.path.exists(xml_path):
        print(f"Warning: File not found: {xml_path}")
        return

    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # BenchExec results store individual test runs in <run> tags
        for run in root.findall('run'):
            is_successful = False
            cpu_time = None
            memory = None

            # Iterate through columns to find status, cputime, and memory
            for column in run.findall('column'):
                title = column.get('title')
                value = column.get('value')

                # We determine success if the category is 'correct'
                if title == 'category' and value == 'unknown':
                    is_successful = True
                elif title == 'cputime':
                    cpu_time = value
                elif title == 'memory':
                    memory = value

            if is_successful:
                # Extract the filename from the 'name' attribute 
                # e.g., "../../benchmarks/test.yml" -> "test.yml"
                full_name = run.get('name', '')
                file_name = os.path.basename(full_name)

                # Write the row: Filename, Input Type, CPU, Memory
                csv_writer.writerow([
                    file_name,
                    input_type_label,
                    cpu_time,
                    memory
                ])
                
    except ET.ParseError as e:
        print(f"Error parsing XML {xml_path}: {e}")

def main():
    print(f"Starting processing...")
    print(f"Output will be saved to: {OUTPUT_FILE}")
    
    with open(OUTPUT_FILE, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        
        # Write Header
        writer.writerow(['Filename', 'Input Type', 'CPU Time', 'Memory'])

        # Loop through the files defined in configuration
        for xml_path, label in FILES_TO_PROCESS:
            print(f"Processing {label} file: {xml_path}")
            parse_and_append_results(xml_path, label, writer)

    print("Processing complete.")

if __name__ == "__main__":
    main()

Starting processing...
Output will be saved to: xcfa_successful_runs.csv
Processing BTOR2 file: xcfa_analyses\fight-xcfa.2025-12-07_11-41-58.results.btor2-xcfa.btor2.xml
Processing C file: xcfa_analyses\fight-xcfa.2025-12-07_11-41-58.results.c-xcfa.c.xml
Processing complete.


In [4]:
import os
import json
import csv

# --- Configuration ---
# The root folder to start searching in. 
# Update this if your folder is named differently or located elsewhere.
ROOT_FOLDER = "fight_xcfa"
OUTPUT_CSV = "xcfa_analysis_metrics.csv"

def count_stmt_labels(obj):
    """
    Recursively counts the number of StmtLabel objects in a nested dictionary/list structure.
    """
    count = 0
    if isinstance(obj, dict):
        # Check if this dictionary represents a StmtLabel
        if obj.get('type') == 'hu.bme.mit.theta.xcfa.model.StmtLabel':
            count += 1
        # Recurse into all values
        for v in obj.values():
            count += count_stmt_labels(v)
    elif isinstance(obj, list):
        # Recurse into all items in the list
        for item in obj:
            count += count_stmt_labels(item)
    return count

def analyze_xcfa_json(file_path):
    """
    Parses an xcfa.json file and returns the required metrics.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        # 1. Variable Count (Global vars + Local vars in all procedures)
        global_vars = len(data.get('vars', []))
        local_vars = 0
        
        # 2. LOCs (Locations) & 3. Edges
        total_locs = 0
        total_edges = 0
        
        procedures = data.get('procedures', [])
        for proc in procedures:
            local_vars += len(proc.get('vars', []))
            total_locs += len(proc.get('locs', []))
            total_edges += len(proc.get('edges', []))
            
        total_vars = global_vars + local_vars

        # 4. Atomic Stmts (Count of StmtLabels)
        atomic_stmts = count_stmt_labels(data)

        return {
            "vars": total_vars,
            "locs": total_locs,
            "edges": total_edges,
            "stmts": atomic_stmts
        }

    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

def main():
    print(f"Scanning directory: {ROOT_FOLDER}")
    print(f"Output will be saved to: {OUTPUT_CSV}")

    results = []

    # Walk through the directory tree
    for root, dirs, files in os.walk(ROOT_FOLDER):
        for file in files:
            if file == "xcfa.json":
                full_path = os.path.join(root, file)
                
                # --- Extract Metadata ---
                
                # 1. Name: The name of the folder containing the json
                # e.g. .../adding.1.yml/xcfa.json -> adding.1.yml
                folder_name = os.path.basename(root)
                
                # 2. Input Type: Check parent folders in the path
                # Normalize path separators to handle Windows/Linux differences
                norm_path = os.path.normpath(full_path)
                path_parts = norm_path.split(os.sep)
                
                input_type = "Unknown"
                if "btor2-xcfa" in path_parts:
                    input_type = "BTOR2"
                elif "c-xcfa" in path_parts:
                    input_type = "C"
                
                # --- Analyze JSON Content ---
                metrics = analyze_xcfa_json(full_path)
                
                if metrics:
                    results.append([
                        folder_name,
                        metrics['vars'],
                        metrics['locs'],
                        metrics['edges'],
                        metrics['stmts'],
                        input_type
                    ])
                    # Optional: Print progress
                    # print(f"Processed: {folder_name} ({input_type})")

    # Write to CSV
    with open(OUTPUT_CSV, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(["Name", "Variable Count", "Locs", "Edges", "Atomic Stmts", "Input Type"])
        writer.writerows(results)

    print(f"Done. Processed {len(results)} files.")

if __name__ == "__main__":
    main()

Scanning directory: fight_xcfa
Output will be saved to: xcfa_analysis_metrics.csv
Done. Processed 1240 files.


In [5]:
import os
import csv
import re

# --- Configuration ---
# List of folders to scan for log files.
# We use raw strings (r"...") to handle backslashes correctly.
LOG_FOLDERS = [
    r"opt_algos_logfiles\btor2-algos-opt.2025-12-07_02-16-12.logfiles",
    r"opt_algos_logfiles\c-algos.2025-12-07_02-30-26.logfiles"
]

OUTPUT_FILE = "log_analysis_iterations.csv"

def get_algorithm_type(filename):
    """
    Deduces a readable Algorithm Type from the log filename.
    """
    lower_name = filename.lower()
    if "pred-cegar" in lower_name:
        return "CEGAR_PRED"
    elif "expl-cegar" in lower_name:
        return "CEGAR_EXPL"
    elif "bounded" in lower_name:
        return "BMC"
    elif "kind" in lower_name:
        return "K-Induction"
    elif "imc" in lower_name:
        return "IMC"
    else:
        # Fallback: return the prefix before the first dot
        return filename.split('.')[0]

def parse_log_file(filepath):
    """
    Parses a single log file to extract input name, type, and iterations.
    """
    filename = os.path.basename(filepath)
    
    # 1. Extract Input File Name (benchmark name)
    # Assumes format: [algo].[benchmark_name].log
    # We try to strip the known algorithm prefixes if they exist, or just take the name.
    try:
        if filename.endswith(".log"):
            base = filename[:-4] # Remove .log
            # Split by first dot to separate the algo prefix (e.g., "c-pred-cegar.") 
            # from the actual file name (e.g. "cambridge.5.yml")
            if '.' in base:
                parts = base.split('.', 1)
                # Heuristic: if the first part looks like an algo prefix, drop it
                if any(x in parts[0] for x in ['btor2', 'c-', 'pred', 'expl', 'bounded', 'kind', 'imc']):
                    benchmark_name = parts[1]
                else:
                    benchmark_name = base
            else:
                benchmark_name = base
        else:
            benchmark_name = filename
    except Exception:
        benchmark_name = filename

    input_type = "Unknown"
    max_iteration = 0

    try:
        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()

            # 2. Extract Input Type (from content)
            # Looks for: --input-type VALUE
            input_match = re.search(r'--input-type\s+(\w+)', content)
            if input_match:
                input_type = input_match.group(1)

            # 3. Count Iterations
            # Matches "Iteration X" (CEGAR) or "Starting iteration X" (BMC/Kind)
            iteration_matches = re.findall(r'(?:Starting )?[Ii]teration\s+(\d+)', content)
            
            if iteration_matches:
                iteration_nums = [int(num) for num in iteration_matches]
                max_iteration = max(iteration_nums)
            else:
                # Fallback for BMC sometimes using "Unrolling X"
                unroll_matches = re.findall(r'Unrolling\s+(\d+)', content)
                if unroll_matches:
                    max_iteration = max([int(num) for num in unroll_matches])

    except Exception as e:
        print(f"Error reading {filename}: {e}")

    # 4. Determine Algorithm Type from the filename
    algo_type = get_algorithm_type(filename)

    return {
        "Input Filename": benchmark_name,
        "Input Type": input_type,
        "Algorithm": algo_type,
        "Iterations": max_iteration
    }

def main():
    print("Starting log analysis...")
    print(f"Output will be saved to: {OUTPUT_FILE}")

    results = []

    # Iterate over each folder in the list
    for folder in LOG_FOLDERS:
        print(f"Scanning folder: {folder}")
        
        if os.path.exists(folder):
            for root, dirs, files in os.walk(folder):
                for file in files:
                    if file.endswith(".log"):
                        full_path = os.path.join(root, file)
                        data = parse_log_file(full_path)
                        results.append(data)
        else:
            print(f"Warning: Folder not found: {folder}")

    # Write results to CSV
    if results:
        headers = ["Input Filename", "Input Type", "Algorithm", "Iterations"]
        
        with open(OUTPUT_FILE, mode='w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            writer.writerows(results)
            
        print(f"Done. Processed {len(results)} log files.")
    else:
        print("No log files found in the specified directories.")

if __name__ == "__main__":
    main()

Starting log analysis...
Output will be saved to: log_analysis_iterations.csv
Scanning folder: opt_algos_logfiles\btor2-algos-opt.2025-12-07_02-16-12.logfiles
Scanning folder: opt_algos_logfiles\c-algos.2025-12-07_02-30-26.logfiles
Done. Processed 6970 log files.
