# Reproducibility
- Notebook: tab_platform_overview.ipynb
- Data sources:
  - replication/data/platform-counts.json
  - replication/data/platforms/<platform>/top100.json
- Expected output:
  - Table: Platform-scale overview (top-100 GitHub subset per platform) (tab:platform_overview)


In [1]:
from pathlib import Path
import json
import math
import pandas as pd
import numpy as np


DATA_DIR = (Path.cwd() / ".." / "data").resolve()
PLATFORM_DIR = DATA_DIR / "platforms"

with (DATA_DIR / "platform-counts.json").open("r", encoding="utf-8") as handle:
    platform_counts = json.load(handle)["platforms"]

PLATFORMS = [entry["platform"] for entry in platform_counts]


def load_top100(platform):
    path = PLATFORM_DIR / platform / "top100.json"
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)["top100"]


In [2]:
rows = []

for entry in platform_counts:
    platform = entry["platform"]
    top100 = load_top100(platform)

    total_stars = sum((p.get("githubStats") or {}).get("stars", 0) or 0 for p in top100)
    total_downloads = sum(p.get("downloads", 0) or 0 for p in top100)
    total_issues = sum((p.get("githubStats") or {}).get("openIssues", 0) or 0 for p in top100)
    total_forks = sum((p.get("githubStats") or {}).get("forks", 0) or 0 for p in top100)

    rows.append({
        "Platform": entry["display"],
        "Category": entry["category"],
        "All Plugins": entry["all_plugins"],
        "OSS GitHub": entry["oss_github"],
        "Top-100 Size": len(top100),
        "Stars (Top-100)": total_stars,
        "Downloads (Top-100)": total_downloads,
        "Issues (Top-100)": total_issues,
        "Forks (Top-100)": total_forks,
    })

summary = {
    "Platform": "Total",
    "Category": "---",
    "All Plugins": sum(row["All Plugins"] for row in rows),
    "OSS GitHub": sum(row["OSS GitHub"] for row in rows),
    "Top-100 Size": sum(row["Top-100 Size"] for row in rows),
    "Stars (Top-100)": sum(row["Stars (Top-100)"] for row in rows),
    "Downloads (Top-100)": sum(row["Downloads (Top-100)"] for row in rows),
    "Issues (Top-100)": sum(row["Issues (Top-100)"] for row in rows),
    "Forks (Top-100)": sum(row["Forks (Top-100)"] for row in rows),
}

rows.append(summary)

df = pd.DataFrame(rows)

display_df = df.copy()
for col in ["All Plugins", "OSS GitHub", "Top-100 Size", "Stars (Top-100)", "Downloads (Top-100)", "Issues (Top-100)", "Forks (Top-100)"]:
    display_df[col] = display_df[col].map(lambda x: f"{int(x):,}")

display_df


Unnamed: 0,Platform,Category,All Plugins,OSS GitHub,Top-100 Size,Stars (Top-100),Downloads (Top-100),Issues (Top-100),Forks (Top-100)
0,Chrome,Browser,246379,7459,100,791751,233670000,33724,120128
1,Firefox,Browser,110320,7862,100,479809,17242577,12156,81394
2,JetBrains,IDE,10003,5849,100,501545,587304466,9213,138394
3,VS Code,IDE,86145,25136,100,184466,2950813730,32550,35166
4,Sublime,IDE,5581,4694,100,59452,49475544,1786,7912
5,WordPress,CMS,59000,3986,100,31718,7180000,11260,9336
6,Minecraft,Gaming,98600,26089,100,30385,2217620403,5332,6984
7,Obsidian,Specialized,2656,2656,100,91575,38734337,8972,6932
8,Home Assistant,Specialized,5187,2389,100,71025,4356695,5513,8487
9,Total,---,623871,86120,900,2241726,6106397752,120506,414733
