In [13]:
import json, sys
import pandas as pd


def load_dict_from_file(file_path):
    """ Load JSON dictionary from a file. """
    try:
        with open(file_path, 'r') as file:
            return json.load(file)['f']
    except FileNotFoundError:
        print("File not found.")
        sys.exit(1)
    except json.JSONDecodeError:
        print("Error decoding JSON.")
        sys.exit(1)


def parse_non_covered_lines(coverage_dict):
    all_non_covered_lines = {}
    total_covered = 0
    total_lines = 0

    for file, coverage_data in coverage_dict.items():
        # Parse percentage (already a decimal)
        p = coverage_data['p']
        file_coverage_info = {'percentage': p}

        # Flatten uncovered line ranges
        non_covered_lines = set()
        raw_non_covered_lines = coverage_data['u']

        for entry in raw_non_covered_lines:
            start_line = entry[0]
            end_line = entry[2]
            non_covered_lines.update(range(start_line, end_line + 1))

        uncovered_count = len(non_covered_lines)

        # Estimate total and covered lines
        if p < 1.0:
            estimated_total_lines = int(uncovered_count / (1 - p))
        else:
            estimated_total_lines = uncovered_count  # No uncovered lines → total = uncovered_count (0)

        covered_count = estimated_total_lines - uncovered_count

        # Add to global totals
        total_covered += covered_count
        total_lines += estimated_total_lines

        file_coverage_info['non_covered_lines'] = non_covered_lines
        file_coverage_info['uncovered_count'] = uncovered_count
        file_coverage_info['total_lines_count'] = estimated_total_lines
        all_non_covered_lines[file] = file_coverage_info

    return all_non_covered_lines, total_covered


def compare_coverage(fuzzer_non_covered_lines, cts_non_covered_lines):
    coverage_diff = {}

    for file, coverage_data in fuzzer_non_covered_lines.items():
        fuzzer_percent_coverage = coverage_data['percentage']

        if file not in cts_non_covered_lines:
            lines_not_covered_by_cts = set()
        else:
            lines_not_covered_by_cts = cts_non_covered_lines[file]['non_covered_lines']
            
        file_coverage_info = {}

        lines_not_covered_by_fuzzer = coverage_data['non_covered_lines']

        lines_covered_by_fuzzer_but_not_cts = lines_not_covered_by_cts.difference(lines_not_covered_by_fuzzer)

        file_coverage_info['num_lines_missed'] = len(lines_covered_by_fuzzer_but_not_cts)
        file_coverage_info['lines_missed'] = sorted(lines_covered_by_fuzzer_but_not_cts)
        coverage_diff[file] = file_coverage_info

    # Add all CTS files
    for file, coverage_data in cts_non_covered_lines.items():
        if file in coverage_diff:
            continue
        
        file_coverage_info = {'num_lines_missed': 0, 'lines_missed': set()}
        coverage_diff[file] = file_coverage_info
    return coverage_diff


def display_difference(difference_in_coverage):
    # Create a list of tuples from the dictionary, selecting both 'percentage' and 'difference' values
    data = [(file, info['num_lines_missed'], info['lines_missed']) for file, info in difference_in_coverage.items()]

    # Create DataFrame with appropriate column names
    df = pd.DataFrame(data, columns=['File', 'Num lines missed', 'Lines missed'])
    
    filtered_df = df[df['Num lines missed'] > 0]

    # Sort the DataFrame by the 'File' column
    print("Total lines missed", df['Num lines missed'].sum())
    return filtered_df.sort_values(by='File')


def collate_coverage(dict_to_collate_to, dict_to_collate_from):
    total_covered = 0
    total_lines = 0

    for file, coverage_data in dict_to_collate_from.items():
        if file not in dict_to_collate_to:
            # Just copy the full file entry over
            dict_to_collate_to[file] = coverage_data
            total_covered += coverage_data['total_lines_count'] - coverage_data['uncovered_count']
            total_lines += coverage_data['total_lines_count']
            continue

        # File exists in both — merge uncovered lines
        existing_data = dict_to_collate_to[file]
        merged_non_covered = existing_data['non_covered_lines'].intersection(
            coverage_data['non_covered_lines']
        )

        uncovered_count = len(merged_non_covered)
        estimated_total_lines = existing_data['total_lines_count']  # use the existing estimate
        covered_count = estimated_total_lines - uncovered_count

        # Update file entry
        dict_to_collate_to[file] = {
            'non_covered_lines': merged_non_covered,
            'uncovered_count': uncovered_count,
            'total_lines_count': estimated_total_lines
        }

        # Update global totals
        total_covered += covered_count
        total_lines += estimated_total_lines

    return dict_to_collate_to, total_covered, total_lines


def cov_compare(fuzzer_cov_path, cts_cov_path):
    # Get coverage stats from JSON
    fuzzer_coverage_dict = load_dict_from_file(fuzzer_cov_path)
    cts_coverage_dict = load_dict_from_file(cts_cov_path)
    cts_shader_coverage_dict = load_dict_from_file(cts_shader_coverage_path)

    # Parse all covered lines by file
    fuzzer_non_covered_lines, fuzzer_total_covered_lines = parse_non_covered_lines(fuzzer_coverage_dict)
    cts_non_covered_lines, _ = parse_non_covered_lines(cts_coverage_dict)
    cts_shader_non_covered_lines, _ = parse_non_covered_lines(cts_shader_coverage_dict)
    

    collated_coverage, total_covered, total_lines = collate_coverage(cts_non_covered_lines, cts_shader_non_covered_lines)
    print((fuzzer_total_covered_lines / total_lines) * 100)
    print((total_covered / total_lines) * 100)

    # Get a dictionary of the percent covered by fuzzer but not CTS
    difference_in_coverage = compare_coverage(fuzzer_non_covered_lines, cts_non_covered_lines)

    # Print to dataframe
    return display_difference(difference_in_coverage)


# File paths (replace these with the actual paths to your JSON files)
fuzzer_coverage_path = './webglitch_no_swarm_formatted.json'
cts_coverage_path = './cts_api_formatted.json'
cts_shader_coverage_path = './cts_shader_formatted.json'

df = cov_compare(fuzzer_coverage_path, cts_coverage_path)
df


13.516737822895553
28.005545215042776
Total lines missed 45


Unnamed: 0,File,Num lines missed,Lines missed
4,out/Debug/gen/node/src/dawn/node/interop/WebGP...,14,"[872, 1912, 1913, 1916, 2831, 2899, 2900, 2901..."
89,src/dawn/native/CommandBuffer.cpp,4,"[72, 73, 74, 75]"
102,src/dawn/native/EncodingContext.cpp,5,"[175, 177, 178, 179, 180]"
122,src/dawn/native/ObjectBase.cpp,3,"[127, 128, 129]"
150,src/dawn/native/Texture.cpp,5,"[864, 865, 1331, 1332, 1333]"
187,src/dawn/node/binding/GPU.cpp,2,"[272, 273]"
188,src/dawn/node/binding/GPUAdapter.cpp,3,"[129, 130, 131]"
202,src/dawn/node/binding/GPUDevice.cpp,5,"[160, 161, 162, 163, 164]"
205,src/dawn/node/binding/GPUQueue.cpp,1,[157]
365,src/tint/lang/wgsl/ast/transform/promote_side_...,3,"[159, 160, 161]"
