# Reproducibility
- Notebook: tab_rq1_deps.ipynb
- Data sources:
  - replication/data/platform-counts.json
  - replication/data/platforms/<platform>/top100.json
  - replication/data/sbom + osv/osv-scans/*.analysis.json
- Expected output:
  - Table: Dependency footprint and freshness (top-100 GitHub subset) (tab:rq1_deps)


In [1]:
from pathlib import Path
import json
import pandas as pd
import numpy as np

DATA_DIR = (Path.cwd() / ".." / "data").resolve()
PLATFORM_DIR = DATA_DIR / "platforms"
OSV_DIR = DATA_DIR / "sbom + osv" / "osv-scans"

with (DATA_DIR / "platform-counts.json").open("r", encoding="utf-8") as handle:
    platform_counts = json.load(handle)["platforms"]

PLATFORMS = [entry["platform"] for entry in platform_counts]


def load_top100(platform):
    path = PLATFORM_DIR / platform / "top100.json"
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)["top100"]


In [2]:
def normalize_repo(raw):
    if not raw or not isinstance(raw, str):
        return None
    repo = raw.strip()
    lower = repo.lower()
    if (lower.startswith("http://") or lower.startswith("https://") or lower.startswith("git@")) and "github.com" not in lower:
        return None
    repo = repo.replace("https://github.com/", "").replace("http://github.com/", "")
    repo = repo.replace("git@github.com:", "")
    repo = repo.replace("github.com/", "")
    repo = repo.replace(".git", "")
    repo = repo.split("#")[0].split("?")[0].rstrip("/")
    parts = repo.split("/")
    if len(parts) < 2:
        return None
    return f"{parts[0]}/{parts[1]}"


def load_osv_map():
    osv_map = {}
    if not OSV_DIR.exists():
        return osv_map
    for path in OSV_DIR.glob("*.analysis.json"):
        data = json.loads(path.read_text(encoding="utf-8"))
        repo = path.stem.replace(".analysis", "").replace("__", "/")
        key = normalize_repo(repo)
        if not key:
            continue
        summary = data.get("summary") or {}
        affected = summary.get("affectedPackages")
        stale_unique = len(data.get("staleDependencies") or [])
        osv_map[key.lower()] = {
            "affected": affected,
            "stale_unique": stale_unique,
        }
    return osv_map


def dep_counts(plugin):
    deps = (plugin.get("githubStats") or {}).get("dependencies") or {}
    prod = len(deps.get("dependencies") or {})
    dev = len(deps.get("devDependencies") or {})
    return prod, dev


osv_map = load_osv_map()


In [3]:
rows = []

for entry in platform_counts:
    platform = entry["platform"]
    top100 = load_top100(platform)

    prod_counts = []
    dev_counts = []
    stale_ratio_sum = 0
    stale_ratio_count = 0

    for plugin in top100:
        prod, dev = dep_counts(plugin)
        prod_counts.append(prod)
        dev_counts.append(dev)

        repo = normalize_repo(plugin.get("repo"))
        if not repo:
            continue
        osv = osv_map.get(repo.lower())
        if not osv or not isinstance(osv.get("affected"), (int, float)):
            continue
        affected = osv.get("affected") or 0
        stale_unique = osv.get("stale_unique", 0)
        ratio = (stale_unique / affected) if affected else 0
        stale_ratio_sum += ratio
        stale_ratio_count += 1

    avg_prod = float(np.mean(prod_counts)) if prod_counts else 0
    avg_dev = float(np.mean(dev_counts)) if dev_counts else 0
    stale_ratio = (stale_ratio_sum / stale_ratio_count) if stale_ratio_count else 0

    rows.append({
        "Platform": entry["display"],
        "Avg Prod Deps": avg_prod,
        "Avg Dev Deps": avg_dev,
        "Stale Dep Ratio": stale_ratio,
    })

df = pd.DataFrame(rows)

df["Avg Prod Deps"] = df["Avg Prod Deps"].map(lambda x: f"{x:.2f}")
df["Avg Dev Deps"] = df["Avg Dev Deps"].map(lambda x: f"{x:.2f}")
df["Stale Dep Ratio"] = df["Stale Dep Ratio"].map(lambda x: f"{x:.2f}")

df


Unnamed: 0,Platform,Avg Prod Deps,Avg Dev Deps,Stale Dep Ratio
0,Chrome,11.77,18.86,0.59
1,Firefox,2.99,9.25,0.45
2,JetBrains,2.25,1.21,0.15
3,VS Code,6.27,19.26,0.66
4,Sublime,0.27,0.45,0.07
5,WordPress,0.96,8.88,0.63
6,Minecraft,3.16,0.09,0.06
7,Obsidian,6.06,19.06,0.78
8,Home Assistant,4.44,12.74,0.63
