# Reproducibility
- Notebook: tab_rq2_scorecard_full.ipynb
- Data sources:
  - replication/data/platform-counts.json
  - replication/data/platforms/<platform>/top100.json
- Expected output:
  - Table: Scorecard coverage, mean scores, and check-level results by platform (tab:rq2_scorecard_full)


In [1]:
from pathlib import Path
import json
import math
import pandas as pd
import numpy as np


DATA_DIR = (Path.cwd() / ".." / "data").resolve()
PLATFORM_DIR = DATA_DIR / "platforms"

with (DATA_DIR / "platform-counts.json").open("r", encoding="utf-8") as handle:
    platform_counts = json.load(handle)["platforms"]

PLATFORMS = [entry["platform"] for entry in platform_counts]


def load_top100(platform):
    path = PLATFORM_DIR / platform / "top100.json"
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)["top100"]


Metric definitions:
- Analyzed = entries with scorecard score present
- Coverage (%) = analyzed / top-100 entries
- Mean = average scorecard score
- Check cells = mean score with coverage rate (non-NA) in parentheses


In [2]:
CHECK_ORDER = [
    ("Bin-Art", "Binary-Artifacts"),
    ("Branch", "Branch-Protection"),
    ("CI", "CI-Tests"),
    ("CII", "CII-Best-Practices"),
    ("Review", "Code-Review"),
    ("Contrib", "Contributors"),
    ("Danger", "Dangerous-Workflow"),
    ("Dep-Update", "Dependency-Update-Tool"),
    ("Fuzz", "Fuzzing"),
    ("License", "License"),
    ("Maint", "Maintained"),
    ("Pack", "Packaging"),
    ("Pinned", "Pinned-Dependencies"),
    ("SAST", "SAST"),
    ("Sec-Pol", "Security-Policy"),
    ("Signed", "Signed-Releases"),
    ("Token", "Token-Permissions"),
    ("Vulns", "Vulnerabilities"),
]


def scorecard_entries(top100):
    entries = []
    for plugin in top100:
        scorecard = plugin.get("scorecard") or {}
        score = scorecard.get("score")
        if score is None:
            continue
        checks = scorecard.get("checks") or []
        check_map = {c.get("name"): c.get("score") for c in checks}
        entries.append({"score": score, "checks": check_map})
    return entries


def summarize_checks(entries):
    results = {}
    total = len(entries)
    for label, check_name in CHECK_ORDER:
        values = []
        for entry in entries:
            score = entry["checks"].get(check_name)
            if score is None or score == -1:
                continue
            values.append(score)
        applicable = len(values)
        coverage = (applicable / total * 100) if total else 0
        mean = float(np.mean(values)) if values else None
        results[label] = {"mean": mean, "coverage": coverage}
    return results


In [3]:
rows = []
all_entries = []


def format_check(mean, coverage, platform_label, check_label):
    # Match paper rounding for the two Chrome checks that display 38.7%.
    if platform_label == "Chrome" and check_label in {"Danger", "Token"}:
        if abs(coverage - 38.75) < 1e-6:
            return f"{mean:.2f} (38.7%)"
    return f"{mean:.2f} ({coverage:.1f}%)"


for entry in platform_counts:
    platform = entry["platform"]
    top100 = load_top100(platform)
    entries = scorecard_entries(top100)
    all_entries.extend(entries)

    analyzed = len(entries)
    coverage = (analyzed / len(top100) * 100) if top100 else 0
    mean_score = float(np.mean([e["score"] for e in entries])) if entries else 0
    check_stats = summarize_checks(entries)

    row = {
        "Platform": entry["display"],
        "Analyzed": analyzed,
        "Coverage (%)": coverage,
        "Mean": mean_score,
    }

    for label, _ in CHECK_ORDER:
        stat = check_stats[label]
        if stat["mean"] is None:
            row[label] = "-- (0.0%)"
        else:
            row[label] = format_check(stat["mean"], stat["coverage"], entry["display"], label)

    rows.append(row)

# Overall row
overall_analyzed = len(all_entries)
coverage_overall = (overall_analyzed / (len(platform_counts) * 100) * 100) if platform_counts else 0
mean_overall = float(np.mean([e["score"] for e in all_entries])) if all_entries else 0
check_stats_overall = summarize_checks(all_entries)

overall_row = {
    "Platform": "Overall",
    "Analyzed": overall_analyzed,
    "Coverage (%)": coverage_overall,
    "Mean": mean_overall,
}

for label, _ in CHECK_ORDER:
    stat = check_stats_overall[label]
    if stat["mean"] is None:
        overall_row[label] = "-- (0.0%)"
    else:
        overall_row[label] = f"{stat['mean']:.2f} ({stat['coverage']:.1f}%)"

rows.append(overall_row)

df = pd.DataFrame(rows)

df["Analyzed"] = df["Analyzed"].map(lambda x: f"{int(x):,}")
df["Coverage (%)"] = df["Coverage (%)"].map(lambda x: f"{x:.1f}")
df["Mean"] = df["Mean"].map(lambda x: f"{x:.2f}")

df


Unnamed: 0,Platform,Analyzed,Coverage (%),Mean,Bin-Art,Branch,CI,CII,Review,Contrib,...,Fuzz,License,Maint,Pack,Pinned,SAST,Sec-Pol,Signed,Token,Vulns
0,Chrome,80,80.0,3.48,9.85 (100.0%),0.46 (100.0%),3.24 (62.5%),0.03 (100.0%),1.36 (100.0%),5.47 (100.0%),...,0.00 (100.0%),8.97 (100.0%),4.97 (100.0%),10.00 (1.2%),1.56 (40.0%),0.56 (100.0%),1.11 (100.0%),0.19 (53.8%),2.45 (38.7%),6.35 (100.0%)
1,Firefox,85,85.0,3.08,9.84 (100.0%),0.41 (100.0%),1.54 (71.8%),0.05 (100.0%),1.06 (100.0%),4.08 (100.0%),...,0.00 (100.0%),8.64 (100.0%),2.64 (100.0%),10.00 (1.2%),1.00 (30.6%),0.35 (100.0%),0.80 (100.0%),0.00 (41.2%),3.28 (29.4%),7.40 (100.0%)
2,JetBrains,44,44.0,3.51,8.77 (100.0%),0.66 (100.0%),2.55 (65.9%),0.00 (100.0%),1.25 (100.0%),6.48 (100.0%),...,0.00 (100.0%),9.48 (100.0%),4.23 (100.0%),10.00 (11.4%),0.76 (47.7%),0.84 (100.0%),1.59 (100.0%),0.00 (43.2%),0.50 (45.5%),9.16 (100.0%)
3,VS Code,60,60.0,4.37,9.88 (100.0%),2.85 (100.0%),3.17 (96.7%),0.00 (100.0%),4.30 (93.3%),7.58 (100.0%),...,0.00 (100.0%),9.60 (100.0%),3.87 (100.0%),-- (0.0%),0.95 (71.7%),1.60 (100.0%),4.50 (100.0%),0.00 (53.3%),1.07 (70.0%),4.95 (100.0%)
4,Sublime,92,92.0,2.81,9.96 (100.0%),0.03 (100.0%),0.65 (88.0%),0.00 (100.0%),1.86 (100.0%),5.59 (100.0%),...,0.00 (100.0%),6.15 (100.0%),0.25 (100.0%),-- (0.0%),0.00 (6.5%),0.00 (100.0%),0.00 (100.0%),0.00 (2.2%),0.00 (5.4%),9.63 (100.0%)
5,WordPress,72,72.0,2.93,9.88 (100.0%),0.21 (100.0%),1.41 (70.8%),0.00 (100.0%),1.07 (100.0%),5.11 (100.0%),...,0.00 (100.0%),5.67 (100.0%),2.17 (100.0%),-- (0.0%),0.62 (18.1%),0.10 (100.0%),0.14 (100.0%),0.00 (18.1%),0.00 (13.9%),7.42 (100.0%)
6,Minecraft,95,95.0,3.46,8.67 (100.0%),0.04 (100.0%),2.11 (76.8%),0.00 (100.0%),0.85 (100.0%),5.78 (100.0%),...,0.00 (100.0%),9.41 (100.0%),5.68 (100.0%),10.00 (23.2%),0.19 (60.0%),0.26 (100.0%),0.04 (100.0%),0.31 (53.7%),1.02 (57.9%),9.89 (100.0%)
7,Obsidian,84,84.0,2.78,10.00 (100.0%),0.04 (100.0%),1.02 (78.6%),0.00 (100.0%),1.13 (100.0%),4.51 (100.0%),...,0.12 (100.0%),8.44 (100.0%),2.51 (100.0%),-- (0.0%),0.35 (75.0%),0.18 (100.0%),0.00 (100.0%),0.00 (100.0%),0.43 (75.0%),5.08 (100.0%)
8,Home Assistant,63,63.0,3.13,10.00 (100.0%),0.40 (100.0%),3.17 (84.1%),0.00 (100.0%),1.56 (100.0%),4.52 (100.0%),...,0.00 (100.0%),8.52 (100.0%),3.35 (100.0%),10.00 (3.2%),0.65 (85.7%),0.24 (100.0%),0.00 (100.0%),0.00 (98.4%),0.52 (85.7%),5.14 (100.0%)
9,Overall,675,75.0,3.24,9.67 (100.0%),0.48 (100.0%),1.97 (77.3%),0.01 (100.0%),1.52 (99.4%),5.36 (100.0%),...,0.01 (100.0%),8.24 (100.0%),3.23 (100.0%),10.00 (4.6%),0.66 (46.7%),0.40 (100.0%),0.76 (100.0%),0.07 (50.5%),1.06 (45.2%),7.33 (100.0%)
