# Reproducibility
- Notebook: tab_rq1_governance.ipynb
- Data sources:
  - replication/data/platform-counts.json
  - replication/data/platforms/<platform>/top100.json
- Expected output:
  - Table: Governance and ownership metrics (top-100 GitHub subset) (tab:rq1_governance)


In [1]:
from pathlib import Path
import json
import math
import pandas as pd
import numpy as np


DATA_DIR = (Path.cwd() / ".." / "data").resolve()
PLATFORM_DIR = DATA_DIR / "platforms"

with (DATA_DIR / "platform-counts.json").open("r", encoding="utf-8") as handle:
    platform_counts = json.load(handle)["platforms"]

PLATFORMS = [entry["platform"] for entry in platform_counts]


def load_top100(platform):
    path = PLATFORM_DIR / platform / "top100.json"
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)["top100"]


Metric definitions:
- Gov. Score = sum of governance artifacts (license, code of conduct, security policy, contributing)
- Workflows = avg githubStats.workflowCount
- Core-Team = avg (top3 contributions / total commits)
- Owner Share = avg participation.ownerPercentage
- Issue Eff. = closed issues / (open + closed)


In [2]:
def governance_score(plugin):
    gov = (plugin.get("githubStats") or {}).get("governance") or {}
    return (
        (1 if gov.get("hasLicense") else 0) +
        (1 if gov.get("hasCodeOfConduct") else 0) +
        (1 if gov.get("hasSecurityPolicy") else 0) +
        (1 if gov.get("hasContributingGuide") else 0)
    )


def workflow_count(plugin):
    return (plugin.get("githubStats") or {}).get("workflowCount", 0) or 0


def core_team_ratio(plugin):
    stats = plugin.get("githubStats") or {}
    top = stats.get("topContributors") or []
    total = (stats.get("commitActivity") or {}).get("totalCommits", 0) or 0
    if not total or not isinstance(top, list):
        return 0
    top3 = sum((c.get("contributions", 0) or 0) for c in top[:3])
    return top3 / total if total else 0


def owner_share(plugin):
    return (plugin.get("githubStats") or {}).get("participation", {}).get("ownerPercentage", 0) or 0


def issue_efficiency(plugin):
    stats = plugin.get("githubStats") or {}
    open_issues = stats.get("openIssues", 0) or 0
    closed_issues = stats.get("closedIssues", 0) or 0
    total = open_issues + closed_issues
    return closed_issues / total if total else 0


In [3]:
rows = []
for entry in platform_counts:
    platform = entry["platform"]
    top100 = load_top100(platform)

    gov_avg = np.mean([governance_score(p) for p in top100])
    workflow_avg = np.mean([workflow_count(p) for p in top100])
    core_avg = np.mean([core_team_ratio(p) for p in top100])
    owner_avg = np.mean([owner_share(p) for p in top100])
    issue_eff_avg = np.mean([issue_efficiency(p) for p in top100])

    rows.append({
        "Platform": entry["display"],
        "Gov. Score": gov_avg,
        "Workflows": workflow_avg,
        "Core-Team": core_avg,
        "Owner Share": owner_avg,
        "Issue Eff.": issue_eff_avg,
    })

df = pd.DataFrame(rows)

df["Gov. Score"] = df["Gov. Score"].map(lambda x: f"{x:.2f}")
df["Workflows"] = df["Workflows"].map(lambda x: f"{x:.2f}")
df["Core-Team"] = df["Core-Team"].map(lambda x: f"{x:.3f}")
df["Owner Share"] = df["Owner Share"].map(lambda x: f"{x:.2f}")
df["Issue Eff."] = df["Issue Eff."].map(lambda x: f"{x:.3f}")

df


Unnamed: 0,Platform,Gov. Score,Workflows,Core-Team,Owner Share,Issue Eff.
0,Chrome,1.08,2.49,6.247,17.99,0.696
1,Firefox,1.06,1.08,0.799,31.57,0.628
2,JetBrains,1.26,2.71,1.223,14.03,0.467
3,VS Code,1.73,3.31,31.302,7.44,0.509
4,Sublime,0.46,0.21,0.0,5.93,0.675
5,WordPress,0.93,2.59,0.272,28.29,0.658
6,Minecraft,0.8,1.24,1.899,40.46,0.657
7,Obsidian,0.81,1.13,6.143,31.37,0.523
8,Home Assistant,0.95,3.18,0.0,30.78,0.707
