'
+ _stat(f'{_avg_1yr:.1f}%', 'Avg 1-Year Retention', f'{_eco} across selected cohorts')
+ _stat(f'{_avg_2yr:.1f}%', 'Avg 2-Year Retention', f'{_eco} across selected cohorts')
+ _stat(str(_best_1yr['cohort_year']), 'Best Cohort', f"{_best_1yr['retention_rate']:.1f}% retention at 1 year")
@@ -268,14 +252,16 @@ def _stat(value, label, caption=''):
_opts = [o for o in _ECOSYSTEMS if o in _states]
_djs_safe = _json.dumps(_states).replace('', '<\\/')
_opts_js = _json.dumps(_opts)
- _sel_html = '
Ecosystem ' + ''.join(f'{o} ' for i, o in enumerate(_opts)) + '
'
+ _sel_html = '
Ecosystem ' + ''.join(f'{o} ' for i, o in enumerate(_opts)) + '
'
_inner = (
'
'
''
''
f'{_sel_html}'
'
'
@@ -283,13 +269,13 @@ def _stat(value, label, caption=''):
f''
)
_src = _html_mod.escape(_inner, quote=True)
- mo.Html(f'
')
+ mo.Html(f'
')
return
diff --git a/notebooks/insights/ethereum-repo-rank.py b/notebooks/insights/ethereum-repo-rank.py
index ed2a398..9eed908 100644
--- a/notebooks/insights/ethereum-repo-rank.py
+++ b/notebooks/insights/ethereum-repo-rank.py
@@ -11,7 +11,7 @@
@app.cell(hide_code=True)
def _(mo):
- _header_html = ''
+ _header_html = ''
mo.Html(_header_html)
return
@@ -37,7 +37,7 @@ def _(df_trending, eth_dev_set, df_engagement_raw, mo):
mo.hstack(
[
mo.stat(value=f"{_panel_size:,}", label="Ethereum Builders Tracked", bordered=True, caption="≥12 months commit activity"),
- mo.stat(value=f"{_active_eth}", label="Eth Builders Active on Trending Repos", bordered=True, caption=f"{_active_eth/_panel_size*100:.1f}% of panel"),
+ mo.stat(value=f"{_active_eth}", label="Active on Trending Repos", bordered=True, caption=f"{_active_eth/_panel_size*100:.1f}% of panel"),
mo.stat(value=_top_eth_repo, label="#1 by Eth Builder Attention", bordered=True, caption=f"{_top_eth_devs} distinct eth builders"),
mo.stat(value=_top_all_repo, label="#1 by All Builder Attention", bordered=True, caption=f"{_top_all_devs:,} distinct builders"),
],
@@ -53,145 +53,132 @@ def _(df_trending, eth_dev_set, df_engagement_raw, mo):
@app.cell(hide_code=True)
-def _(mo):
- leaderboard_window = mo.ui.dropdown(
- options=["Past 30 Days", "Past 7 Days"],
- value="Past 30 Days",
- label="",
- )
- leaderboard_sort = mo.ui.dropdown(
- options=["All Builder Attention", "Eth Builder Attention"],
- value="All Builder Attention",
- label="Sort by",
- )
- leaderboard_page = mo.ui.dropdown(
- options=["Page 1", "Page 2", "Page 3", "Page 4"],
- value="Page 1",
- label="",
- )
- return leaderboard_page, leaderboard_sort, leaderboard_window
-
+def _(df_trending, mo):
+ import json as _json
+ import html as _html_mod
-@app.cell(hide_code=True)
-def _(df_trending, leaderboard_window, leaderboard_sort, leaderboard_page, mo):
- _window = leaderboard_window.value
- _is_30d = _window == "Past 30 Days"
- _all_col = "global_engagers_30d" if _is_30d else "global_engagers_7d"
- _eth_col = "eth_devs_30d" if _is_30d else "eth_devs_7d"
- _suffix = "30d" if _is_30d else "7d"
-
- # Sort by selected column, cap at 100
- _sort_col = _eth_col if leaderboard_sort.value == "Eth Builder Attention" else _all_col
- _df = df_trending.sort_values(_sort_col, ascending=False).head(100).reset_index(drop=True)
-
- # Momentum: 7d daily rate / 30d daily rate
+ # Prepare top 100 rows with momentum
+ _df = df_trending.sort_values("global_engagers_30d", ascending=False).head(100).reset_index(drop=True)
_df["rate_7d"] = _df["global_engagers_7d"] / 7
_df["rate_30d"] = _df["global_engagers_30d"] / 30
_df["momentum"] = _df["rate_7d"] / _df["rate_30d"].clip(lower=0.01)
- # Pagination
- _page_size = 25
- _total_pages = max(1, -(-len(_df) // _page_size))
- _page = min(int(leaderboard_page.value.split()[-1]), _total_pages)
- _start = (_page - 1) * _page_size
- _end = min(_start + _page_size, len(_df))
- _page_df = _df.iloc[_start:_end]
-
- def _fmt(n):
- if n >= 10000:
- return f"{n/1000:.1f}K"
- if n >= 1000:
- return f"{n/1000:.2f}K"
- return str(int(n))
-
- def _heat(m):
- if m >= 1.5:
- return "🌶🌶🌶"
- if m >= 0.7:
- return "🌶🌶"
- return "🌶"
-
- def _rank_badge(i):
- if i == 0:
- return '
🥇 '
- if i == 1:
- return '
🥈 '
- if i == 2:
- return '
🥉 '
- return f'
#{i+1} '
-
- # Sort indicators for column headers
- _eth_arrow = " ↓" if leaderboard_sort.value == "Eth Builder Attention" else ""
- _all_arrow = " ↓" if leaderboard_sort.value == "All Builder Attention" else ""
-
- _th = "padding:6px 8px;font-size:0.68em;color:#64748b;font-weight:700;text-transform:uppercase;letter-spacing:0.06em;white-space:nowrap;"
- _td = "padding:4px 8px;vertical-align:middle;"
-
- def _community(pct):
- if pct >= 0.01:
- return '
Crypto '
- return '
Mainstream '
-
- _rows = []
- for _idx in range(len(_page_df)):
- _r = _page_df.iloc[_idx]
- _rank = _start + _idx
- _repo = _r["repo_name"]
+ _records = []
+ for _i in range(len(_df)):
+ _r = _df.iloc[_i]
_desc = str(_r.get("description", ""))
if len(_desc) > 72:
_desc = _desc[:69] + "..."
- _row_bg = "background:#fafbfc;" if _idx % 2 == 1 else ""
-
- _rows.append(f"""
- {_rank_badge(_rank)}
- {_repo}
- {_desc}
- {_community(_r["eth_dev_pct"])}
- {_fmt(int(_r[_eth_col]))}
- {_fmt(int(_r[_all_col]))}
- {_heat(_r["momentum"])}
- """)
-
- _table_html = f"""
-
-
-
-
-
-
-
-
-
-
-
-
- #
- Repository
- Description
- Community
- Eth Builders{_eth_arrow}
- All Builders{_all_arrow}
- Heat
-
-
-
- {"".join(_rows)}
-
-
-
"""
-
- _footer_text = f'
Showing {_start+1}–{_end} of {len(_df)} repos with Ethereum builder signal '
+ _records.append({
+ "repo_name": str(_r["repo_name"]),
+ "description": _desc,
+ "eth_dev_pct": float(_r["eth_dev_pct"]),
+ "eth_devs_30d": int(_r["eth_devs_30d"]),
+ "eth_devs_7d": int(_r["eth_devs_7d"]),
+ "global_engagers_30d": int(_r["global_engagers_30d"]),
+ "global_engagers_7d": int(_r["global_engagers_7d"]),
+ "momentum": float(_r["momentum"]),
+ })
+
+ _data_js = _json.dumps(_records).replace("", "<\\/")
+
+ _inner = (
+ '
'
+ ''
+ '
'
+ '
'
+ 'Sort by '
+ ''
+ 'Eth Builder Attention '
+ 'All Builder Attention '
+ ' '
+ ''
+ 'Past 30 Days '
+ 'Past 7 Days '
+ ' '
+ '
'
+ '
'
+ ''
+ ''
+ )
+ _src = _html_mod.escape(_inner, quote=True)
mo.vstack([
- mo.hstack([
- mo.md("### Trending Repos"),
- mo.hstack([leaderboard_sort, leaderboard_window], gap=1, justify="end"),
- ], justify="space-between", align="end"),
- mo.Html(_table_html),
- mo.hstack([
- mo.md(_footer_text),
- leaderboard_page,
- ], justify="space-between", align="center"),
+ mo.md("### Trending Repos"),
+ mo.Html(f'
'),
])
return
@@ -583,76 +570,95 @@ def _make_table(df_slice, show_repos=True):
@app.cell(hide_code=True)
-def _(df_trending, mo):
+def _(df_trending, df_engagement_daily, mo):
+ import json as _json2
+ import html as _html_mod2
+
+ # Build repo options (sorted by popularity)
_repo_opts = (
df_trending[df_trending["global_engagers_30d"] > 0]
.sort_values("global_engagers_30d", ascending=False)["repo_name"]
.tolist()
)
- # Default to zeroclaw, ironclaw, openclaw
+
+ # Default selections
_preferred = ["zeroclaw-labs/zeroclaw", "nearai/ironclaw", "openclaw/openclaw"]
_defaults = [r for r in _preferred if r in _repo_opts]
- compare_repos = mo.ui.multiselect(
- options=_repo_opts,
- value=_defaults,
- label="**Compare repos**",
- max_selections=6,
- full_width=True,
+ # Serialize daily engagement data for all top repos
+ _daily_subset = df_engagement_daily[df_engagement_daily["repo_name"].isin(_repo_opts)].copy()
+ _daily_subset["day_str"] = _daily_subset["day"].dt.strftime("%Y-%m-%d")
+ _daily_records = {}
+ for _repo in _repo_opts:
+ _rd = _daily_subset[_daily_subset["repo_name"] == _repo].sort_values("day")
+ if len(_rd) > 0:
+ _daily_records[_repo] = {
+ "days": _rd["day_str"].tolist(),
+ "cum": _rd["cum_engagers"].tolist(),
+ }
+
+ _opts_js = _json2.dumps(_repo_opts).replace("", "<\\/")
+ _defaults_js = _json2.dumps(_defaults).replace("", "<\\/")
+ _daily_js = _json2.dumps(_daily_records).replace("", "<\\/")
+
+ _opt_html = "".join(
+ f'
{r} ' for r in _repo_opts
)
- return (compare_repos,)
-
-@app.cell(hide_code=True)
-def _(compare_repos, df_engagement_daily, mo, go, PLOTLY_LAYOUT):
- _selected = compare_repos.value if compare_repos.value else []
-
- if not _selected:
- _out = mo.vstack([
- mo.md("### Cumulative builder engagement\n\nSelect repos above to compare growth curves."),
- compare_repos,
- ])
- else:
- _sel_lower = [r.lower() for r in _selected]
- _eh = df_engagement_daily[df_engagement_daily["repo_name"].isin(_sel_lower)]
-
- _colors = ["#6366f1", "#10b981", "#f59e0b", "#ef4444", "#8b5cf6", "#06b6d4"]
-
- _fig = go.Figure()
-
- for _i, _repo in enumerate(_sel_lower):
- _short = _repo.split("/")[-1]
- _color = _colors[_i % len(_colors)]
- _r, _g, _b = int(_color[1:3], 16), int(_color[3:5], 16), int(_color[5:7], 16)
-
- _d = _eh[_eh["repo_name"] == _repo].sort_values("day")
- if len(_d) > 0:
- _fig.add_trace(go.Scatter(
- x=_d["day"], y=_d["cum_engagers"], name=_short,
- mode="lines", line=dict(color=_color, width=2, shape="hvh"),
- fill="tozeroy", fillcolor=f"rgba({_r},{_g},{_b},0.05)",
- hovertemplate="%{y:,.0f} builders
" + _short + " ",
- ))
-
- _fig.update_layout(
- **{
- **PLOTLY_LAYOUT,
- "legend": {**PLOTLY_LAYOUT["legend"], "orientation": "h", "yanchor": "bottom", "y": 1.04, "xanchor": "left", "x": 0, "bgcolor": "rgba(255,255,255,0.8)"},
- "margin": dict(t=40, l=70, r=40, b=50),
- "height": 450,
- },
- )
- _fig.update_xaxes(tickformat="%b %d", showgrid=False)
- _fig.update_yaxes(showgrid=True, tickformat=",")
+ _inner2 = (
+ '
'
+ ''
+ )
+ _src2 = _html_mod2.escape(_inner2, quote=True)
- _out = mo.vstack([
- mo.md("""### Cumulative builder engagement
+ mo.vstack([
+ mo.md("""### Cumulative builder engagement
- Each line counts unique builders who starred or forked (first event only). A steep ramp means viral discovery; a flattening curve means the moment has passed."""),
- compare_repos,
- mo.ui.plotly(_fig, config={"displayModeBar": False}),
- ])
- _out
+ Each line counts unique builders who starred or forked (first event only). A steep ramp means viral discovery; a flattening curve means the moment has passed."""),
+ mo.Html(f'
'),
+ ])
return
@@ -664,17 +670,29 @@ def _(compare_repos, df_engagement_daily, mo, go, PLOTLY_LAYOUT):
@app.cell(hide_code=True)
def _(mo):
mo.accordion({
- "Methodology & Data Sources": mo.md("""
- **Developer panel**: Qualified builders with ≥12 months commit activity on Ethereum panel repos and verified GitHub logins. ~920 infrastructure, ~415 DeFi.
+ "Metrics & Definitions": mo.md("""
+ **Ethereum builder panel**: Qualified builders with ≥12 months commit activity on Ethereum panel repos and verified GitHub logins. ~920 infrastructure, ~415 DeFi.
+
+ **Engagement metrics**: Stars + forks collected over 30-day and 7-day rolling windows. Stargazer/forker usernames are scraped directly from the GitHub API and deduplicated within each window.
+
+ **Signal strength (eth_dev_pct)**: For each repo, the share of engagers who are Ethereum panel builders. A high percentage means the repo is drawing disproportionate attention from active Ethereum developers relative to the mainstream GitHub audience.
+
+ **Momentum**: 7-day daily engagement rate divided by 30-day daily engagement rate. A ratio above 1.0 means the repo is accelerating; below 1.0 means interest is cooling.
+ """),
+ "Assumptions & Limitations": mo.md("""
+ **Starring ≠ using.** A star is a lightweight signal of interest, not adoption or production use.
- **Repo selection**: Top ~150 repos by aggregate panel builder attention (stars + forks). Stargazer/forker usernames scraped directly from the GitHub API (30-day rolling window).
+ **Non-random sample.** The repos were selected *because* they attracted Ethereum builder attention — this is not a representative cross-section of all open source software.
- **Signal strength**: For each repo, we compute the % of engagers (stargazers + forkers, deduplicated) who are Ethereum panel builders — revealing disproportionate interest vs mainstream audience.
+ **Panel is a ceiling, not a floor.** The Ethereum builder panel captures the most active slice of Ethereum developers. Many builders fall below the activity threshold and are not counted.
- **Caveats**: Starring ≠ using. The repos were selected *because* they attracted Ethereum builder attention — this is not a random sample. The panel captures the most active slice of Ethereum builders, not all of them. Attention patterns shift; a repo trending today may be forgotten next month.
+ **Attention is ephemeral.** Engagement patterns shift quickly. A repo trending today may drop out of the rankings next month as the community's focus moves on.
+ """),
+ "Data Sources": mo.md("""
+ **OSO data warehouse** — `ethereum.local_rank_models` (repo rankings and signal strength), `ethereum.dev_engagement_models` (per-repo engagement counts and panel overlap)
- **Data sources**: [OSO](https://www.oso.xyz) data warehouse (`ethereum.local_rank_models`, `ethereum.dev_engagement_models`) · GitHub API (GraphQL + REST)
- """)
+ **GitHub API** — GraphQL + REST endpoints used to collect stargazer and forker usernames for each tracked repo over rolling windows.
+ """),
})
return
diff --git a/notebooks/insights/speedrun-ethereum.py b/notebooks/insights/speedrun-ethereum.py
index 9b3b36e..fab0a45 100644
--- a/notebooks/insights/speedrun-ethereum.py
+++ b/notebooks/insights/speedrun-ethereum.py
@@ -6,63 +6,7 @@
@app.cell(hide_code=True)
def header_title(mo):
- mo.md("""
- # Case Study: Speedrun Ethereum
-
Owner: OSO Team · Last Updated: 2026-02-17
-
- An in-depth case study on the role Speedrun Ethereum has played in onboarding and retaining new Ethereum developers.
- """)
- return
-
-
-@app.cell(hide_code=True)
-def header_accordion(mo):
- mo.accordion({
- "Overview": mo.md("""
-- Speedrun Ethereum (SRE) is a self-paced challenge program that has onboarded 17,000+ developers into the Ethereum ecosystem
-- This analysis examines whether SRE successfully converts newcomers into sustained Ethereum contributors — and the data suggests it does
-- Key questions: What share of SRE graduates remain active in Ethereum after 1–2 years? How does prior experience affect outcomes? Where do graduates go after SRE?
- """),
- "Context": mo.md("""
-We conducted this analysis as part of a broader inquiry into the state of the Ethereum developer ecosystem in 2025, grounded in three working hypotheses:
-
-1. Developer retention is a leading indicator of ecosystem health and, over time, a meaningful predictor of long-term token price, value accrual, network GDP, etc.
-2. Ethereum's early open-source culture is eroding as the crypto ecosystem matures, becomes more competitive, and partners with tradfi/web2.
-3. Other ecosystems (eg, AI) have emerged as powerful bottom-up attractors for ambitious, mission-driven developers.
-
-Using Speedrun Ethereum as a focused case study, the data suggests that bottom-up programs still work. Speedrun Ethereum is successfully counteracting these headwinds by onboarding, retaining, and anchoring net-new developers in the Ethereum ecosystem.
-
-**Working hypotheses:**
-1. Developer retention is a leading indicator of ecosystem health
-2. Ethereum's early open-source culture is under pressure from competition and crypto maturation
-3. AI and other ecosystems are attracting ambitious developers who might otherwise go to Ethereum
-
-**Key definitions:**
-- **Users**: Developers with GitHub profiles saved in the SRE registry (not all 17K+ total users)
-- **Cohort Month**: Profile `createdAt` date rounded to the nearest month
-- **Batch ID**: Some though not all developers were assigned a learning batch (group) when they went through the program
-- **Challenges Completed**: The number of SRE challenges the user successfully completed (according to their profile)
-- **Location**: Where available, the country code of the user
-- **Forked `scaffold-eth`**: Whether the user has one or more of the scaffold-eth repos forked to their personal GitHub
-- **Experience Categories**: *Newb* (minimal prior GitHub activity), *Learning* (<1 year prior), *Experienced* (>12 months prior), *Delayed Start* (became active several months after SRE start)
-- **Active Month**: ≥1 qualifying Push or PullRequest event on a public GitHub repo
-- **Ecosystem Classification**: Repos classified as *Ethereum*, *Other EVM Chain*, *Personal*, or *Other* via Electric Capital mappings
-- **Retention**: Share of a cohort active at month *t*, normalized at month 0
-- **Full-time month**: >10 days of qualifying activity
-- **Velocity**: Sum over active days of (1 + ln(events per day))
-- **Change Categories**: Average monthly activity changes after SRE compared to before
-
-**Metric Definitions**
-- Activity — Monthly Active Developer (MAD) methodology
-- Retention — Cohort-based retention methodology
- """),
- "Data Sources": mo.md("""
-- **SRE GitHub users** — `int_sre_github_users`: user registry, cohorts, batches, challenges completed
-- **GitHub events** — `int_sre_github_events_by_user`: public GitHub events joined to SRE users, from [GitHub Archive](https://gharchive.org)
-- **Ecosystem mappings** — `stg_opendevdata__*`: Electric Capital's repo → ecosystem mappings, via [Open Dev Data](https://opendevdata.org/)
-- **Further reading**: [Speedrun Ethereum](https://speedrunethereum.com/) · [Pyoso docs](https://docs.opensource.observer/docs/get-started/python) · [Marimo docs](https://docs.marimo.io/)
- """),
- })
+ mo.Html('')
return
@@ -524,20 +468,22 @@ def section_activity_by_ecosystem(
_opts = list(_states.keys())
_djs_safe = _json.dumps(_states).replace('', '<\\/')
_opts_js = _json.dumps(_opts)
- _sel_html = '
Analyze ' + ''.join(f'{o} ' for i, o in enumerate(_opts)) + '
'
+ _sel_html = '
Analyze ' + ''.join(f'{o} ' for i, o in enumerate(_opts)) + '
'
_inner = (
'
'
''
''
f'{_sel_html}'
'
'
f''
@@ -548,7 +494,7 @@ def section_activity_by_ecosystem(
mo.md("---"),
mo.md("## Speedrun Ethereum has contributed an incremental ~250 monthly active developers to Ethereum"),
mo.md("_Measured as the increase in Ethereum-active developers attributable to SRE alumni relative to the pre-SRE baseline. Showing Ethereum ecosystem._"),
- mo.Html(f'
'),
+ mo.Html(f'
'),
])
return
@@ -943,20 +889,22 @@ def section_experience_funnel(
_opts2 = list(_states2.keys())
_djs2_safe = _json2.dumps(_states2).replace('', '<\\/')
_opts2_js = _json2.dumps(_opts2)
- _sel2_html = '
Ecosystem ' + ''.join(f'{o} ' for i, o in enumerate(_opts2)) + '
'
+ _sel2_html = '
Ecosystem ' + ''.join(f'{o} ' for i, o in enumerate(_opts2)) + '
'
_inner2 = (
'
'
''
''
f'{_sel2_html}'
'
'
f''
@@ -967,7 +915,7 @@ def section_experience_funnel(
mo.vstack([
mo.md("## Not surprisingly, less experienced developers have higher churn and less overall long-term impact on Ethereum"),
- mo.Html(f'
'),
+ mo.Html(f'
'),
mo.md("The table below provides additional detail on the developer funnel:"),
show_table(_df_table2)
])
@@ -1179,20 +1127,22 @@ def section_experience_retention(
_opts3 = list(_states3.keys())
_djs3_safe = _json3.dumps(_states3).replace('', '<\\/')
_opts3_js = _json3.dumps(_opts3)
- _sel3_html = '
Ecosystem ' + ''.join(f'{o} ' for i, o in enumerate(_opts3)) + '
'
+ _sel3_html = '
Ecosystem ' + ''.join(f'{o} ' for i, o in enumerate(_opts3)) + '
'
_inner3 = (
'
'
''
''
f'{_sel3_html}'
'
'
f''
@@ -1202,7 +1152,7 @@ def section_experience_retention(
mo.vstack([
mo.md("---"),
mo.md("## Developers with > 12 months prior experience remain active contributors to Ethereum at significantly higher rates"),
- mo.Html(f'
'),
+ mo.Html(f'
'),
])
return
@@ -1242,20 +1192,22 @@ def section_experienced_dev_activity(
_opts4 = list(_states4.keys())
_djs4_safe = _json4.dumps(_states4).replace('', '<\\/')
_opts4_js = _json4.dumps(_opts4)
- _sel4_html = '
Experience Level ' + ''.join(f'{o} ' for i, o in enumerate(_opts4)) + '
'
+ _sel4_html = '
Experience Level ' + ''.join(f'{o} ' for i, o in enumerate(_opts4)) + '
'
_inner4 = (
'
'
''
''
f'{_sel4_html}'
'
'
f''
@@ -1266,7 +1218,7 @@ def section_experienced_dev_activity(
mo.md("---"),
mo.md("## For experienced developers, Speedrun Ethereum functions less as onboarding and more as activation and redirection toward Ethereum"),
mo.md("_Showing Active Developers metric for Ethereum ecosystem. Select experience level:_"),
- mo.Html(f'
'),
+ mo.Html(f'
'),
])
return
@@ -1298,20 +1250,22 @@ def section_cohort_year_retention(
_opts5 = list(_states5.keys())
_djs5_safe = _json5.dumps(_states5).replace('', '<\\/')
_opts5_js = _json5.dumps(_opts5)
- _sel5_html = '
Experience Level ' + ''.join(f'{o} ' for i, o in enumerate(_opts5)) + '
'
+ _sel5_html = '
Experience Level ' + ''.join(f'{o} ' for i, o in enumerate(_opts5)) + '
'
_inner5 = (
'
'
''
''
f'{_sel5_html}'
'
'
f''
@@ -1322,7 +1276,7 @@ def section_cohort_year_retention(
mo.md("---"),
mo.md("## Engagement past the 3–month mark is a good predictor of longer-term retention"),
mo.md("_Showing Ethereum ecosystem by cohort year. Select experience level:_"),
- mo.Html(f'
'),
+ mo.Html(f'
'),
])
return
@@ -1690,6 +1644,45 @@ def section_where_now(
return
+@app.cell(hide_code=True)
+def header_accordion(mo):
+ mo.accordion({
+ "Metrics & Definitions": mo.md("""
+**Key definitions:**
+- **Users**: Developers with GitHub profiles saved in the SRE registry (not all 17K+ total users)
+- **Cohort Month**: Profile `createdAt` date rounded to the nearest month
+- **Batch ID**: Some though not all developers were assigned a learning batch (group) when they went through the program
+- **Challenges Completed**: The number of SRE challenges the user successfully completed (according to their profile)
+- **Location**: Where available, the country code of the user
+- **Forked `scaffold-eth`**: Whether the user has one or more of the scaffold-eth repos forked to their personal GitHub
+- **Experience Categories**: *Newb* (minimal prior GitHub activity), *Learning* (<1 year prior), *Experienced* (>12 months prior), *Delayed Start* (became active several months after SRE start)
+- **Active Month**: ≥1 qualifying Push or PullRequest event on a public GitHub repo
+- **Ecosystem Classification**: Repos classified as *Ethereum*, *Other EVM Chain*, *Personal*, or *Other* via Electric Capital mappings
+- **Retention**: Share of a cohort active at month *t*, normalized at month 0
+- **Full-time month**: >10 days of qualifying activity
+- **Velocity**: Sum over active days of (1 + ln(events per day))
+- **Change Categories**: Average monthly activity changes after SRE compared to before
+
+**Metric Definitions:**
+- **Activity** — Monthly Active Developer (MAD) methodology
+- **Retention** — Cohort-based retention methodology
+ """),
+ "Assumptions & Limitations": mo.md("""
+- **SRE data completeness**: The SRE registry only covers developers who created a GitHub profile through the program — the full 17K+ user count includes developers not represented in this dataset
+- **GitHub-only activity tracking**: All activity metrics are derived from public GitHub events; off-chain contributions, forum participation, and private repo activity are not captured
+- **Attribution methodology**: Correlations between SRE participation and ecosystem activity do not imply causation — developers who complete SRE may have become active Ethereum contributors regardless
+- **Time period scope**: Analysis is bounded by GitHub Archive availability and the SRE registry snapshot date; recent cohorts have shorter observation windows and lower apparent retention by construction
+ """),
+ "Data Sources": mo.md("""
+- **SRE GitHub users** — `int_sre_github_users`: user registry, cohorts, batches, challenges completed
+- **GitHub events** — `int_sre_github_events_by_user`: public GitHub events joined to SRE users, from [GitHub Archive](https://gharchive.org)
+- **Ecosystem mappings** — `stg_opendevdata__*`: Electric Capital's repo → ecosystem mappings, via [Open Dev Data](https://opendevdata.org/)
+- **Further reading**: [Speedrun Ethereum](https://speedrunethereum.com/) · [Pyoso docs](https://docs.opensource.observer/docs/get-started/python) · [Marimo docs](https://docs.marimo.io/)
+ """),
+ })
+ return
+
+
@app.cell(hide_code=True)
def _():
# Code snippets
diff --git a/notebooks/styles/_insights.css b/notebooks/styles/_insights.css
index a38214b..973dbf8 100644
--- a/notebooks/styles/_insights.css
+++ b/notebooks/styles/_insights.css
@@ -5,7 +5,7 @@
*
* CSS classes prefixed with .ddp- are ready for notebooks to adopt.
* Currently most notebooks use inline styles for these patterns —
- * see OSO-XXXX for the cleanup ticket to migrate to these classes.
+ * see OSO-2047 for the cleanup ticket to migrate to these classes.
*/
/* ==========================================================================
@@ -260,6 +260,38 @@ table td code, table td .mono {
margin-bottom: 0.5em;
}
+/* ==========================================================================
+ CARD WRAPPER (bordered container with shadow)
+ Used by: ethereum-repo-rank.py table wrapper
+ Replace: style="border:1px solid #e2e8f0;border-radius:12px;overflow:hidden;
+ background:white;box-shadow:0 1px 3px rgba(0,0,0,0.04);"
+ With: class="ddp-card"
+ ========================================================================== */
+
+.ddp-card {
+ border: 1px solid var(--ddp-border);
+ border-radius: 8px;
+ overflow: hidden;
+ background: var(--ddp-bg);
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.04);
+}
+
+/* Taller iframe variant for lifecycle/retention charts */
+.ddp-chart-frame-tall {
+ width: 100%;
+ height: 580px;
+ border: none;
+ display: block;
+}
+
+/* Label above a select element */
+.ddp-select-label {
+ font-size: 0.6875em;
+ color: var(--ddp-text-muted);
+ display: block;
+ margin-bottom: 2px;
+}
+
/* ==========================================================================
MOBILE
========================================================================== */
diff --git a/notebooks/styles/_root.css b/notebooks/styles/_root.css
index 185eee4..b8c71cd 100644
--- a/notebooks/styles/_root.css
+++ b/notebooks/styles/_root.css
@@ -4,12 +4,7 @@
* Prose-optimized layout for documentation/onboarding content.
*/
-/* Constrain prose width for readability */
-.marimo-output > div {
- max-width: 720px;
- margin-left: auto;
- margin-right: auto;
-}
+/* Use same 1100px as base — consistent across all variants */
/* Accordion styling */
details {
diff --git a/notebooks/styles/base.css b/notebooks/styles/base.css
index 2ca28a1..1754310 100644
--- a/notebooks/styles/base.css
+++ b/notebooks/styles/base.css
@@ -42,6 +42,13 @@ code, code *, pre, pre *, .cm-editor, .cm-editor * {
font-family: var(--ddp-font-mono) !important;
}
+/* === Content width ===
+ * Global max-width for all cell output so tables, charts, markdown,
+ * and stat cards align to the same column. Root variant narrows to 720px. */
+#root [data-cell-id] > div {
+ max-width: 1100px !important;
+}
+
/* === Base === */
body {
margin: 0;
@@ -179,12 +186,13 @@ td {
}
[data-testid="marimo-stat"] [class*="value"],
[data-testid="marimo-stat"] [class*="Value"] {
- font-size: 1.5em !important;
+ font-size: clamp(1em, 4vw, 1.5em) !important;
font-weight: 600 !important;
letter-spacing: -0.02em !important;
color: var(--ddp-text) !important;
overflow-wrap: break-word !important;
word-break: break-word !important;
+ line-height: 1.2 !important;
}
[data-testid="marimo-stat"] [class*="caption"],
[data-testid="marimo-stat"] [class*="Caption"] {