From eca220d9fe6127b7c9c90d208ec57c7e9219602c Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 19:50:30 +0100 Subject: [PATCH 01/14] example gallery --- .gitignore | 1 + Makefile | 7 +- docs/source/_static/gallery.css | 37 +++ docs/source/_static/interrogate_badge.svg | 6 +- docs/source/conf.py | 8 + docs/source/notebooks/index.md | 205 ++++++++++---- scripts/generate_gallery.py | 311 ++++++++++++++++++++++ 7 files changed, 520 insertions(+), 55 deletions(-) create mode 100644 docs/source/_static/gallery.css create mode 100755 scripts/generate_gallery.py diff --git a/.gitignore b/.gitignore index 9a6b0579..be2ee558 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ dist/ docs/build/ docs/jupyter_execute/ docs/source/api/generated/ +docs/source/_static/thumbnails/ diff --git a/Makefile b/Makefile index 604b0532..d80ce7fd 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ PACKAGE_DIR = causalpy # COMMANDS # ################################################################################# -.PHONY: init lint check_lint test uml html cleandocs doctest help +.PHONY: init lint check_lint test uml gallery html cleandocs doctest help init: ## Install the package in editable mode python -m pip install -e . --no-deps @@ -31,7 +31,10 @@ test: ## Run all tests with pytest uml: ## Generate UML diagrams from code pyreverse -o png causalpy --output-directory docs/source/_static --ignore tests -html: ## Build HTML documentation with Sphinx +gallery: ## Generate example gallery from notebooks + python scripts/generate_gallery.py + +html: gallery ## Build HTML documentation with Sphinx sphinx-build -b html docs/source docs/_build cleandocs: ## Clean the documentation build directories diff --git a/docs/source/_static/gallery.css b/docs/source/_static/gallery.css new file mode 100644 index 00000000..e7b9ad56 --- /dev/null +++ b/docs/source/_static/gallery.css @@ -0,0 +1,37 @@ +/* Custom CSS for uniform gallery card sizes - square-like cards */ +.sd-card { + height: 100%; + display: flex; + flex-direction: column; +} + +.sd-card-body { + flex-grow: 1; + display: flex; + flex-direction: column; + padding: 0.5rem; +} + +.sd-card-img-top { + width: 100%; + height: 250px; + object-fit: contain; + background-color: #f8f9fa; + padding: 8px; +} + +.sd-card-header { + padding: 0.75rem 0.5rem; + font-size: 0.9rem; + line-height: 1.3; + min-height: auto; +} + +.sd-grid-item { + display: flex; +} + +/* Ensure grid items stretch to same height */ +.sd-grid { + align-items: stretch; +} diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg index a00d0758..8734d55d 100644 --- a/docs/source/_static/interrogate_badge.svg +++ b/docs/source/_static/interrogate_badge.svg @@ -1,5 +1,5 @@ - interrogate: 95.7% + interrogate: 95.8% @@ -12,8 +12,8 @@ interrogate interrogate - 95.7% - 95.7% + 95.8% + 95.8% diff --git a/docs/source/conf.py b/docs/source/conf.py index e298dfd1..938537bf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -81,6 +81,13 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] master_doc = "index" +# Suppress warnings for notebooks linked from gallery (not in toctree) +suppress_warnings = [ + "toc.not_included", # Notebooks are linked from gallery, not toctree + "bibtex.duplicate_label", # BibTeX duplicate labels (less critical) + "bibtex.duplicate_citation", # BibTeX duplicate citations (less critical) +] + # bibtex config bibtex_bibfiles = ["references.bib"] bibtex_default_style = "unsrt" @@ -138,6 +145,7 @@ html_theme = "labs_sphinx_theme" html_static_path = ["_static"] html_favicon = "_static/favicon_logo.png" +html_css_files = ["gallery.css"] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 17eaa40c..d843870c 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -1,75 +1,180 @@ -# How-to - -:::{toctree} -:caption: ANCOVA -:maxdepth: 1 - -ancova_pymc.ipynb +# Example Gallery +## ANCOVA +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} ANCOVA for pre/post treatment nonequivalent group designs +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/ancova_pymc.png +:link: ancova_pymc +:link-type: doc ::: +:::: -:::{toctree} -:caption: Synthetic Control -:maxdepth: 1 +## Synthetic Control +::::{grid} 1 2 3 3 +:gutter: 3 -sc_skl.ipynb -sc_pymc.ipynb -sc_pymc_brexit.ipynb +:::{grid-item-card} Synthetic control with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/sc_pymc.png +:link: sc_pymc +:link-type: doc +::: +:::{grid-item-card} The effects of Brexit +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/sc_pymc_brexit.png +:link: sc_pymc_brexit +:link-type: doc +::: +:::{grid-item-card} Synthetic control with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/sc_skl.png +:link: sc_skl +:link-type: doc ::: +:::: -:::{toctree} -:caption: Geographical lift testing -:maxdepth: 1 +## Geographical lift testing +::::{grid} 1 2 3 3 +:gutter: 3 -geolift1.ipynb -multi_cell_geolift.ipynb +:::{grid-item-card} Bayesian geolift with CausalPy +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/geolift1.png +:link: geolift1 +:link-type: doc ::: +:::{grid-item-card} Multi-cell geolift analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/multi_cell_geolift.png +:link: multi_cell_geolift +:link-type: doc +::: +:::: -:::{toctree} -:caption: Difference in Differences -:maxdepth: 1 +## Difference in Differences +::::{grid} 1 2 3 3 +:gutter: 3 -did_skl.ipynb -did_pymc.ipynb -did_pymc_banks.ipynb +:::{grid-item-card} Difference in Differences with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_pymc.png +:link: did_pymc +:link-type: doc +::: +:::{grid-item-card} Banking dataset with a `pymc` model +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_pymc_banks.png +:link: did_pymc_banks +:link-type: doc +::: +:::{grid-item-card} Difference in Differences with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_skl.png +:link: did_skl +:link-type: doc ::: +:::: -:::{toctree} -:caption: Interrupted Time Series -:maxdepth: 1 +## Interrupted Time Series +::::{grid} 1 2 3 3 +:gutter: 3 -its_skl.ipynb -its_pymc.ipynb -its_covid.ipynb +:::{grid-item-card} Excess deaths due to COVID-19 +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_covid.png +:link: its_covid +:link-type: doc ::: +:::{grid-item-card} Bayesian Interrupted Time Series +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_pymc.png +:link: its_pymc +:link-type: doc +::: +:::{grid-item-card} Interrupted Time Series (ITS) with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_skl.png +:link: its_skl +:link-type: doc +::: +:::: -:::{toctree} -:caption: Regression Discontinuity -:maxdepth: 1 +## Regression Discontinuity +::::{grid} 1 2 3 3 +:gutter: 3 -rd_skl.ipynb -rd_pymc.ipynb -rd_pymc_drinking.ipynb +:::{grid-item-card} Sharp regression discontinuity with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_pymc.png +:link: rd_pymc +:link-type: doc +::: +:::{grid-item-card} Drinking age - Bayesian analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_pymc_drinking.png +:link: rd_pymc_drinking +:link-type: doc +::: +:::{grid-item-card} Sharp regression discontinuity with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_skl.png +:link: rd_skl +:link-type: doc ::: +:::{grid-item-card} Drinking age with a scikit-learn model +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_skl_drinking.png +:link: rd_skl_drinking +:link-type: doc +::: +:::: -:::{toctree} -:caption: Regression Kink Design -:maxdepth: 1 +## Regression Kink Design +::::{grid} 1 2 3 3 +:gutter: 3 -rkink_pymc.ipynb +:::{grid-item-card} Regression kink design with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rkink_pymc.png +:link: rkink_pymc +:link-type: doc ::: +:::: -:::{toctree} -:caption: Instrumental Variables Regression -:maxdepth: 1 +## Instrumental Variables Regression +::::{grid} 1 2 3 3 +:gutter: 3 -iv_pymc.ipynb -iv_weak_instruments.ipynb +:::{grid-item-card} Instrumental Variable Modelling (IV) with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/iv_pymc.png +:link: iv_pymc +:link-type: doc +::: +:::{grid-item-card} Instrumental Regression and Justifying Instruments with `pymc` +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/iv_weak_instruments.png +:link: iv_weak_instruments +:link-type: doc ::: +:::: -:::{toctree} -:caption: Inverse Propensity Score Weighting -:maxdepth: 1 +## Inverse Propensity Score Weighting +::::{grid} 1 2 3 3 +:gutter: 3 -inv_prop_pymc.ipynb -inv_prop_latent.ipynb +:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/inv_prop_latent.png +:link: inv_prop_latent +:link-type: doc +::: +:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/inv_prop_pymc.png +:link: inv_prop_pymc +:link-type: doc ::: +:::: diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py new file mode 100755 index 00000000..6e83d059 --- /dev/null +++ b/scripts/generate_gallery.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python3 +""" +Generate example gallery for CausalPy documentation. + +This script scans notebooks in docs/source/notebooks/, extracts metadata, +generates thumbnails from the first plot in each notebook, and creates +a gallery page using sphinx-design cards. +""" + +import base64 +import re +import sys +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +try: + import nbformat + from nbconvert.preprocessors import ExecutePreprocessor +except ImportError: + print( + "Error: nbformat and nbconvert are required. Install with: pip install nbformat nbconvert" + ) + sys.exit(1) + +try: + from PIL import Image +except ImportError: + print("Warning: Pillow not found. Thumbnails will not be generated.") + Image = None # type: ignore[assignment,misc] + + +# Category mapping based on filename patterns +CATEGORY_MAPPING = { + "ANCOVA": ["ancova"], + "Synthetic Control": ["sc_", "synthetic_control"], + "Geographical lift testing": ["geolift", "multi_cell_geolift"], + "Difference in Differences": ["did_"], + "Interrupted Time Series": ["its_"], + "Regression Discontinuity": ["rd_"], + "Regression Kink Design": ["rkink"], + "Instrumental Variables Regression": ["iv_"], + "Inverse Propensity Score Weighting": ["inv_prop"], +} + + +def get_notebook_category(filename: str) -> str: + """Determine the category for a notebook based on its filename.""" + filename_lower = filename.lower() + for category, patterns in CATEGORY_MAPPING.items(): + for pattern in patterns: + if pattern in filename_lower: + return category + return "Other" + + +def extract_metadata(notebook_path: Path) -> Tuple[str, str]: + """Extract title and description from notebook.""" + with open(notebook_path, "r", encoding="utf-8") as f: + nb = nbformat.read(f, as_version=4) + + title = None + description = "" + + # Look for title in first markdown cell + for cell in nb.cells: + if cell.cell_type == "markdown": + source = cell.source.strip() + # Look for H1 or H2 title + title_match = re.match(r"^#+\s+(.+)$", source, re.MULTILINE) + if title_match: + title = title_match.group(1).strip() + # Get description from rest of first markdown cell + lines = source.split("\n") + description_lines = [] + found_title = False + for line in lines: + if re.match(r"^#+\s+", line): + found_title = True + continue + if found_title and line.strip(): + # Skip MyST directives and formulas + stripped = line.strip() + if stripped.startswith(":::"): + break # Stop at first MyST directive + if stripped.startswith("$$") or stripped.startswith("$"): + continue # Skip math formulas + if stripped.startswith("*") and ":" in stripped: + continue # Skip list items that are definitions + description_lines.append(stripped) + if len(description_lines) >= 2: # Take first 2 meaningful lines + break + description = " ".join(description_lines) + break + + # Fallback to filename-based title + if not title: + title = notebook_path.stem.replace("_", " ").title() + + return title, description + + +def extract_first_image(notebook_path: Path, output_dir: Path) -> Optional[str]: + """Extract first image from notebook outputs (without executing if outputs exist).""" + if Image is None: + return None + + try: + # Read notebook + with open(notebook_path, "r", encoding="utf-8") as f: + nb = nbformat.read(f, as_version=4) + + # First, try to find images in existing outputs (no execution needed) + for cell in nb.cells: + if cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs: + for output in cell.outputs: + if ( + output.output_type == "display_data" + or output.output_type == "execute_result" + ): + if "image/png" in output.get("data", {}): + image_data = output["data"]["image/png"] + return _save_thumbnail( + notebook_path, output_dir, image_data + ) + + # If no images found in existing outputs, try executing (with short timeout) + # Only execute if notebook appears to have no outputs + has_outputs = any( + cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs + for cell in nb.cells + ) + + if not has_outputs: + print(f" Executing {notebook_path.name} to generate thumbnail...") + ep = ExecutePreprocessor( + timeout=120, kernel_name="python3" + ) # 2 min timeout + try: + ep.preprocess(nb, {"metadata": {"path": str(notebook_path.parent)}}) + except Exception as e: + print(f" Warning: Failed to execute {notebook_path.name}: {e}") + return None + + # Find first image in outputs after execution + for cell in nb.cells: + if cell.cell_type == "code" and hasattr(cell, "outputs"): + for output in cell.outputs: + if ( + output.output_type == "display_data" + or output.output_type == "execute_result" + ): + if "image/png" in output.get("data", {}): + image_data = output["data"]["image/png"] + return _save_thumbnail( + notebook_path, output_dir, image_data + ) + + return None + except Exception as e: + print(f"Warning: Could not generate thumbnail for {notebook_path.name}: {e}") + return None + + +def _save_thumbnail( + notebook_path: Path, output_dir: Path, image_data: str +) -> Optional[str]: + """Save thumbnail image from base64 data.""" + try: + thumbnail_name = f"{notebook_path.stem}.png" + thumbnail_path = output_dir / thumbnail_name + + img_data = base64.b64decode(image_data) + with open(thumbnail_path, "wb") as img_file: + img_file.write(img_data) + + # Resize thumbnail to uniform square-like size (crop/pad to maintain aspect ratio) + try: + img = Image.open(thumbnail_path) + # Target size for uniform thumbnails - more square-like + target_size = (400, 250) + + # Calculate scaling to fit within target while maintaining aspect ratio + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # Create a new image with target size and paste centered + new_img = Image.new("RGB", target_size, (255, 255, 255)) + # Calculate position to center the image + x_offset = (target_size[0] - img.size[0]) // 2 + y_offset = (target_size[1] - img.size[1]) // 2 + new_img.paste(img, (x_offset, y_offset)) + new_img.save(thumbnail_path) + except Exception as e: + print(f"Warning: Could not resize thumbnail for {notebook_path.name}: {e}") + + # Use relative path: from notebooks/ subdirectory, go up to source root, then to _static + return f"../_static/thumbnails/{thumbnail_name}" + except Exception as e: + print(f"Warning: Could not save thumbnail for {notebook_path.name}: {e}") + return None + + +def generate_gallery_markdown(notebooks_data: List[Dict], output_path: Path): + """Generate gallery markdown file with sphinx-design cards.""" + # Group notebooks by category + categories: Dict[str, List[Dict]] = {} + for nb_data in notebooks_data: + category = nb_data["category"] + if category not in categories: + categories[category] = [] + categories[category].append(nb_data) + + # Sort categories + category_order = list(CATEGORY_MAPPING.keys()) + ["Other"] + sorted_categories = sorted( + categories.keys(), + key=lambda x: category_order.index(x) if x in category_order else 999, + ) + + # Generate markdown + lines = ["# Example Gallery\n"] + + for category in sorted_categories: + if category not in categories: + continue + + notebooks = categories[category] + # Sort notebooks within category + notebooks.sort(key=lambda x: x["filename"]) + + lines.append(f"## {category}\n") + lines.append("::::{grid} 1 2 3 3\n") + lines.append(":gutter: 3\n\n") + + for nb in notebooks: + # Title goes on the same line as grid-item-card (escape braces in f-string) + card_lines = [f":::{'{grid-item-card}'} {nb['title']}\n"] + # Add class to ensure uniform card height + card_lines.append(":class-card: sd-card-h-100\n") + + if nb.get("thumbnail"): + card_lines.append(f":img-top: {nb['thumbnail']}\n") + + # Use document name without extension (relative to current directory) + # Since index.md is in notebooks/, links are relative to that directory + doc_name = nb["filename"].replace(".ipynb", "") + card_lines.append(f":link: {doc_name}\n") + card_lines.append(":link-type: doc\n") + card_lines.append(":::\n") + lines.extend(card_lines) + + lines.append("::::\n\n") + + # Write to file + with open(output_path, "w", encoding="utf-8") as f: + f.write("".join(lines)) + + +def main(): + """Main function to generate gallery.""" + # Paths + repo_root = Path(__file__).parent.parent + notebooks_dir = repo_root / "docs" / "source" / "notebooks" + thumbnails_dir = repo_root / "docs" / "source" / "_static" / "thumbnails" + output_file = notebooks_dir / "index.md" + + # Create thumbnails directory + thumbnails_dir.mkdir(parents=True, exist_ok=True) + + # Find all notebooks + notebook_files = sorted(notebooks_dir.glob("*.ipynb")) + + if not notebook_files: + print("No notebooks found!") + sys.exit(1) + + print(f"Found {len(notebook_files)} notebooks") + + # Process each notebook + notebooks_data = [] + for nb_path in notebook_files: + print(f"Processing {nb_path.name}...") + + # Extract metadata + title, description = extract_metadata(nb_path) + + # Determine category + category = get_notebook_category(nb_path.name) + + # Generate thumbnail + thumbnail = extract_first_image(nb_path, thumbnails_dir) + + notebooks_data.append( + { + "filename": nb_path.name, + "title": title, + "description": description, + "category": category, + "thumbnail": thumbnail, + } + ) + + # Generate gallery markdown + print("Generating gallery markdown...") + generate_gallery_markdown(notebooks_data, output_file) + + print(f"Gallery generated successfully at {output_file}") + print(f"Thumbnails saved to {thumbnails_dir}") + + +if __name__ == "__main__": + main() From 2a2e73f9a36ca74064e73ae8a6d9fed527cf983f Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:01:08 +0100 Subject: [PATCH 02/14] font size --- docs/source/_static/gallery.css | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/_static/gallery.css b/docs/source/_static/gallery.css index e7b9ad56..7fee587d 100644 --- a/docs/source/_static/gallery.css +++ b/docs/source/_static/gallery.css @@ -22,7 +22,6 @@ .sd-card-header { padding: 0.75rem 0.5rem; - font-size: 0.9rem; line-height: 1.3; min-height: auto; } From 52ecbbc8e7ccd2da6db6e1cc6108014afaa78641 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:03:26 +0100 Subject: [PATCH 03/14] rm mapping --- scripts/generate_gallery.py | 97 +++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 31 deletions(-) diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index 6e83d059..e0314e50 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -29,27 +29,52 @@ Image = None # type: ignore[assignment,misc] -# Category mapping based on filename patterns -CATEGORY_MAPPING = { - "ANCOVA": ["ancova"], - "Synthetic Control": ["sc_", "synthetic_control"], - "Geographical lift testing": ["geolift", "multi_cell_geolift"], - "Difference in Differences": ["did_"], - "Interrupted Time Series": ["its_"], - "Regression Discontinuity": ["rd_"], - "Regression Kink Design": ["rkink"], - "Instrumental Variables Regression": ["iv_"], - "Inverse Propensity Score Weighting": ["inv_prop"], -} - - -def get_notebook_category(filename: str) -> str: - """Determine the category for a notebook based on its filename.""" - filename_lower = filename.lower() - for category, patterns in CATEGORY_MAPPING.items(): - for pattern in patterns: - if pattern in filename_lower: - return category +def load_categories_from_index(index_path: Path) -> Dict[str, List[str]]: + """ + Load category structure from existing index.md. + + Reads the markdown file and extracts: + - Category names from ## headers + - Notebook names from :link: fields under each category + + Returns + ------- + Dict[str, List[str]] + Mapping from category name to list of notebook names (without .ipynb) + """ + categories: Dict[str, List[str]] = {} + current_category = None + + if not index_path.exists(): + return categories + + try: + with open(index_path, "r", encoding="utf-8") as f: + for line in f: + # Check for category header (## Category Name) + if line.startswith("## "): + current_category = line[3:].strip() + if current_category and current_category != "Example Gallery": + categories[current_category] = [] + # Check for notebook links under current category + elif current_category and ":link:" in line: + # Extract notebook name from :link: notebook_name + link_match = re.search(r":link:\s+(\S+)", line) + if link_match: + notebook_name = link_match.group(1) + categories[current_category].append(notebook_name) + except Exception as e: + print(f"Warning: Could not load categories from {index_path}: {e}") + + return categories + + +def get_notebook_category(filename: str, category_mapping: Dict[str, List[str]]) -> str: + """Determine the category for a notebook from the loaded mapping.""" + notebook_name = filename.replace(".ipynb", "") + for category, notebooks in category_mapping.items(): + if notebook_name in notebooks: + return category return "Other" @@ -199,7 +224,11 @@ def _save_thumbnail( return None -def generate_gallery_markdown(notebooks_data: List[Dict], output_path: Path): +def generate_gallery_markdown( + notebooks_data: List[Dict], + output_path: Path, + category_mapping: Dict[str, List[str]], +): """Generate gallery markdown file with sphinx-design cards.""" # Group notebooks by category categories: Dict[str, List[Dict]] = {} @@ -209,12 +238,13 @@ def generate_gallery_markdown(notebooks_data: List[Dict], output_path: Path): categories[category] = [] categories[category].append(nb_data) - # Sort categories - category_order = list(CATEGORY_MAPPING.keys()) + ["Other"] - sorted_categories = sorted( - categories.keys(), - key=lambda x: category_order.index(x) if x in category_order else 999, - ) + # Sort categories - maintain order from index.md (order of appearance) + # Use the order from category_mapping to preserve the structure + sorted_categories = [cat for cat in category_mapping.keys() if cat in categories] + # Add any categories found in notebooks but not in mapping (shouldn't happen, but handle gracefully) + for cat in categories.keys(): + if cat not in sorted_categories: + sorted_categories.append(cat) # Generate markdown lines = ["# Example Gallery\n"] @@ -266,6 +296,11 @@ def main(): # Create thumbnails directory thumbnails_dir.mkdir(parents=True, exist_ok=True) + # Load category structure from existing index.md + category_mapping = load_categories_from_index(output_file) + if category_mapping: + print(f"Loaded {len(category_mapping)} categories from index.md") + # Find all notebooks notebook_files = sorted(notebooks_dir.glob("*.ipynb")) @@ -283,8 +318,8 @@ def main(): # Extract metadata title, description = extract_metadata(nb_path) - # Determine category - category = get_notebook_category(nb_path.name) + # Determine category from index.md structure + category = get_notebook_category(nb_path.name, category_mapping) # Generate thumbnail thumbnail = extract_first_image(nb_path, thumbnails_dir) @@ -301,7 +336,7 @@ def main(): # Generate gallery markdown print("Generating gallery markdown...") - generate_gallery_markdown(notebooks_data, output_file) + generate_gallery_markdown(notebooks_data, output_file, category_mapping) print(f"Gallery generated successfully at {output_file}") print(f"Thumbnails saved to {thumbnails_dir}") From eb1d4b06ea5ff055a4c7eb38490805dc74c0cce1 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:05:33 +0100 Subject: [PATCH 04/14] better hints --- scripts/generate_gallery.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index e0314e50..41038df9 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -11,7 +11,6 @@ import re import sys from pathlib import Path -from typing import Dict, List, Optional, Tuple try: import nbformat @@ -29,7 +28,7 @@ Image = None # type: ignore[assignment,misc] -def load_categories_from_index(index_path: Path) -> Dict[str, List[str]]: +def load_categories_from_index(index_path: Path) -> dict[str, list[str]]: """ Load category structure from existing index.md. @@ -39,10 +38,10 @@ def load_categories_from_index(index_path: Path) -> Dict[str, List[str]]: Returns ------- - Dict[str, List[str]] + dict[str, list[str]] Mapping from category name to list of notebook names (without .ipynb) """ - categories: Dict[str, List[str]] = {} + categories: dict[str, list[str]] = {} current_category = None if not index_path.exists(): @@ -69,7 +68,7 @@ def load_categories_from_index(index_path: Path) -> Dict[str, List[str]]: return categories -def get_notebook_category(filename: str, category_mapping: Dict[str, List[str]]) -> str: +def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) -> str: """Determine the category for a notebook from the loaded mapping.""" notebook_name = filename.replace(".ipynb", "") for category, notebooks in category_mapping.items(): @@ -78,7 +77,7 @@ def get_notebook_category(filename: str, category_mapping: Dict[str, List[str]]) return "Other" -def extract_metadata(notebook_path: Path) -> Tuple[str, str]: +def extract_metadata(notebook_path: Path) -> tuple[str, str]: """Extract title and description from notebook.""" with open(notebook_path, "r", encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) @@ -124,7 +123,7 @@ def extract_metadata(notebook_path: Path) -> Tuple[str, str]: return title, description -def extract_first_image(notebook_path: Path, output_dir: Path) -> Optional[str]: +def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None: """Extract first image from notebook outputs (without executing if outputs exist).""" if Image is None: return None @@ -188,7 +187,7 @@ def extract_first_image(notebook_path: Path, output_dir: Path) -> Optional[str]: def _save_thumbnail( notebook_path: Path, output_dir: Path, image_data: str -) -> Optional[str]: +) -> str | None: """Save thumbnail image from base64 data.""" try: thumbnail_name = f"{notebook_path.stem}.png" @@ -225,13 +224,13 @@ def _save_thumbnail( def generate_gallery_markdown( - notebooks_data: List[Dict], + notebooks_data: list[dict], output_path: Path, - category_mapping: Dict[str, List[str]], + category_mapping: dict[str, list[str]], ): """Generate gallery markdown file with sphinx-design cards.""" # Group notebooks by category - categories: Dict[str, List[Dict]] = {} + categories: dict[str, list[dict]] = {} for nb_data in notebooks_data: category = nb_data["category"] if category not in categories: From cc11104815de0e32b6ce3b457be480059978910f Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:08:07 +0100 Subject: [PATCH 05/14] simplify code --- scripts/generate_gallery.py | 229 +++++++++++------------------------- 1 file changed, 71 insertions(+), 158 deletions(-) diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index 41038df9..49493670 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -56,12 +56,8 @@ def load_categories_from_index(index_path: Path) -> dict[str, list[str]]: if current_category and current_category != "Example Gallery": categories[current_category] = [] # Check for notebook links under current category - elif current_category and ":link:" in line: - # Extract notebook name from :link: notebook_name - link_match = re.search(r":link:\s+(\S+)", line) - if link_match: - notebook_name = link_match.group(1) - categories[current_category].append(notebook_name) + elif current_category and (match := re.search(r":link:\s+(\S+)", line)): + categories[current_category].append(match.group(1)) except Exception as e: print(f"Warning: Could not load categories from {index_path}: {e}") @@ -71,56 +67,40 @@ def load_categories_from_index(index_path: Path) -> dict[str, list[str]]: def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) -> str: """Determine the category for a notebook from the loaded mapping.""" notebook_name = filename.replace(".ipynb", "") - for category, notebooks in category_mapping.items(): - if notebook_name in notebooks: - return category - return "Other" + return next( + ( + cat + for cat, notebooks in category_mapping.items() + if notebook_name in notebooks + ), + "Other", + ) -def extract_metadata(notebook_path: Path) -> tuple[str, str]: - """Extract title and description from notebook.""" +def extract_metadata(notebook_path: Path) -> str: + """Extract title from notebook.""" with open(notebook_path, "r", encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) - title = None - description = "" - # Look for title in first markdown cell for cell in nb.cells: if cell.cell_type == "markdown": - source = cell.source.strip() - # Look for H1 or H2 title - title_match = re.match(r"^#+\s+(.+)$", source, re.MULTILINE) - if title_match: - title = title_match.group(1).strip() - # Get description from rest of first markdown cell - lines = source.split("\n") - description_lines = [] - found_title = False - for line in lines: - if re.match(r"^#+\s+", line): - found_title = True - continue - if found_title and line.strip(): - # Skip MyST directives and formulas - stripped = line.strip() - if stripped.startswith(":::"): - break # Stop at first MyST directive - if stripped.startswith("$$") or stripped.startswith("$"): - continue # Skip math formulas - if stripped.startswith("*") and ":" in stripped: - continue # Skip list items that are definitions - description_lines.append(stripped) - if len(description_lines) >= 2: # Take first 2 meaningful lines - break - description = " ".join(description_lines) - break + if match := re.search(r"^#+\s+(.+)$", cell.source.strip(), re.MULTILINE): + return match.group(1).strip() # Fallback to filename-based title - if not title: - title = notebook_path.stem.replace("_", " ").title() + return notebook_path.stem.replace("_", " ").title() - return title, description + +def _find_image_in_notebook(nb) -> str | None: + """Find first PNG image in notebook outputs.""" + for cell in nb.cells: + if cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs: + for output in cell.outputs: + if output.output_type in ("display_data", "execute_result"): + if image_data := output.get("data", {}).get("image/png"): + return image_data + return None def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None: @@ -129,55 +109,27 @@ def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None: return None try: - # Read notebook with open(notebook_path, "r", encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) - # First, try to find images in existing outputs (no execution needed) - for cell in nb.cells: - if cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs: - for output in cell.outputs: - if ( - output.output_type == "display_data" - or output.output_type == "execute_result" - ): - if "image/png" in output.get("data", {}): - image_data = output["data"]["image/png"] - return _save_thumbnail( - notebook_path, output_dir, image_data - ) - - # If no images found in existing outputs, try executing (with short timeout) - # Only execute if notebook appears to have no outputs - has_outputs = any( + # Try to find images in existing outputs first + if image_data := _find_image_in_notebook(nb): + return _save_thumbnail(notebook_path, output_dir, image_data) + + # Execute if notebook has no outputs + if not any( cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs for cell in nb.cells - ) - - if not has_outputs: + ): print(f" Executing {notebook_path.name} to generate thumbnail...") - ep = ExecutePreprocessor( - timeout=120, kernel_name="python3" - ) # 2 min timeout try: - ep.preprocess(nb, {"metadata": {"path": str(notebook_path.parent)}}) + ExecutePreprocessor(timeout=120, kernel_name="python3").preprocess( + nb, {"metadata": {"path": str(notebook_path.parent)}} + ) + if image_data := _find_image_in_notebook(nb): + return _save_thumbnail(notebook_path, output_dir, image_data) except Exception as e: print(f" Warning: Failed to execute {notebook_path.name}: {e}") - return None - - # Find first image in outputs after execution - for cell in nb.cells: - if cell.cell_type == "code" and hasattr(cell, "outputs"): - for output in cell.outputs: - if ( - output.output_type == "display_data" - or output.output_type == "execute_result" - ): - if "image/png" in output.get("data", {}): - image_data = output["data"]["image/png"] - return _save_thumbnail( - notebook_path, output_dir, image_data - ) return None except Exception as e: @@ -193,30 +145,22 @@ def _save_thumbnail( thumbnail_name = f"{notebook_path.stem}.png" thumbnail_path = output_dir / thumbnail_name - img_data = base64.b64decode(image_data) - with open(thumbnail_path, "wb") as img_file: - img_file.write(img_data) - - # Resize thumbnail to uniform square-like size (crop/pad to maintain aspect ratio) - try: - img = Image.open(thumbnail_path) - # Target size for uniform thumbnails - more square-like - target_size = (400, 250) - - # Calculate scaling to fit within target while maintaining aspect ratio - img.thumbnail(target_size, Image.Resampling.LANCZOS) - - # Create a new image with target size and paste centered - new_img = Image.new("RGB", target_size, (255, 255, 255)) - # Calculate position to center the image - x_offset = (target_size[0] - img.size[0]) // 2 - y_offset = (target_size[1] - img.size[1]) // 2 - new_img.paste(img, (x_offset, y_offset)) - new_img.save(thumbnail_path) - except Exception as e: - print(f"Warning: Could not resize thumbnail for {notebook_path.name}: {e}") - - # Use relative path: from notebooks/ subdirectory, go up to source root, then to _static + # Decode and save image + thumbnail_path.write_bytes(base64.b64decode(image_data)) + + # Resize to uniform size (400x250) with padding + img = Image.open(thumbnail_path) + target_size = (400, 250) + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # Create padded image + new_img = Image.new("RGB", target_size, (255, 255, 255)) + new_img.paste( + img, + ((target_size[0] - img.size[0]) // 2, (target_size[1] - img.size[1]) // 2), + ) + new_img.save(thumbnail_path) + return f"../_static/thumbnails/{thumbnail_name}" except Exception as e: print(f"Warning: Could not save thumbnail for {notebook_path.name}: {e}") @@ -232,56 +176,35 @@ def generate_gallery_markdown( # Group notebooks by category categories: dict[str, list[dict]] = {} for nb_data in notebooks_data: - category = nb_data["category"] - if category not in categories: - categories[category] = [] - categories[category].append(nb_data) - - # Sort categories - maintain order from index.md (order of appearance) - # Use the order from category_mapping to preserve the structure - sorted_categories = [cat for cat in category_mapping.keys() if cat in categories] - # Add any categories found in notebooks but not in mapping (shouldn't happen, but handle gracefully) - for cat in categories.keys(): - if cat not in sorted_categories: - sorted_categories.append(cat) + categories.setdefault(nb_data["category"], []).append(nb_data) + + # Sort categories maintaining order from index.md + sorted_categories = [ + cat for cat in category_mapping.keys() if cat in categories + ] + [cat for cat in categories.keys() if cat not in category_mapping] # Generate markdown lines = ["# Example Gallery\n"] for category in sorted_categories: - if category not in categories: - continue - - notebooks = categories[category] - # Sort notebooks within category - notebooks.sort(key=lambda x: x["filename"]) + notebooks = sorted(categories[category], key=lambda x: x["filename"]) - lines.append(f"## {category}\n") - lines.append("::::{grid} 1 2 3 3\n") - lines.append(":gutter: 3\n\n") + lines.extend([f"## {category}\n", "::::{grid} 1 2 3 3\n", ":gutter: 3\n\n"]) for nb in notebooks: - # Title goes on the same line as grid-item-card (escape braces in f-string) - card_lines = [f":::{'{grid-item-card}'} {nb['title']}\n"] - # Add class to ensure uniform card height - card_lines.append(":class-card: sd-card-h-100\n") - + doc_name = nb["filename"].replace(".ipynb", "") + card_lines = [ + f":::{'{grid-item-card}'} {nb['title']}\n", + ":class-card: sd-card-h-100\n", + ] if nb.get("thumbnail"): card_lines.append(f":img-top: {nb['thumbnail']}\n") - - # Use document name without extension (relative to current directory) - # Since index.md is in notebooks/, links are relative to that directory - doc_name = nb["filename"].replace(".ipynb", "") - card_lines.append(f":link: {doc_name}\n") - card_lines.append(":link-type: doc\n") - card_lines.append(":::\n") + card_lines.extend([f":link: {doc_name}\n", ":link-type: doc\n", ":::\n"]) lines.extend(card_lines) lines.append("::::\n\n") - # Write to file - with open(output_path, "w", encoding="utf-8") as f: - f.write("".join(lines)) + output_path.write_text("".join(lines), encoding="utf-8") def main(): @@ -314,22 +237,12 @@ def main(): for nb_path in notebook_files: print(f"Processing {nb_path.name}...") - # Extract metadata - title, description = extract_metadata(nb_path) - - # Determine category from index.md structure - category = get_notebook_category(nb_path.name, category_mapping) - - # Generate thumbnail - thumbnail = extract_first_image(nb_path, thumbnails_dir) - notebooks_data.append( { "filename": nb_path.name, - "title": title, - "description": description, - "category": category, - "thumbnail": thumbnail, + "title": extract_metadata(nb_path), + "category": get_notebook_category(nb_path.name, category_mapping), + "thumbnail": extract_first_image(nb_path, thumbnails_dir), } ) From 2f06e88405c0532c255dc1496add3acb93278608 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:11:49 +0100 Subject: [PATCH 06/14] further simplification --- docs/source/notebooks/index.md | 116 ++++++++++++++++----------------- scripts/generate_gallery.py | 50 ++++++-------- 2 files changed, 79 insertions(+), 87 deletions(-) diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index d843870c..9df3018d 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -1,12 +1,12 @@ # Example Gallery -## ANCOVA +## Regression Kink Design ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} ANCOVA for pre/post treatment nonequivalent group designs +:::{grid-item-card} Regression kink design with `pymc` models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/ancova_pymc.png -:link: ancova_pymc +:img-top: ../_static/thumbnails/rkink_pymc.png +:link: rkink_pymc :link-type: doc ::: :::: @@ -35,44 +35,50 @@ ::: :::: -## Geographical lift testing +## Inverse Propensity Score Weighting ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Bayesian geolift with CausalPy +:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/geolift1.png -:link: geolift1 +:img-top: ../_static/thumbnails/inv_prop_latent.png +:link: inv_prop_latent :link-type: doc ::: -:::{grid-item-card} Multi-cell geolift analysis +:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/multi_cell_geolift.png -:link: multi_cell_geolift +:img-top: ../_static/thumbnails/inv_prop_pymc.png +:link: inv_prop_pymc :link-type: doc ::: :::: -## Difference in Differences +## ANCOVA ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Difference in Differences with `pymc` models +:::{grid-item-card} ANCOVA for pre/post treatment nonequivalent group designs :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/did_pymc.png -:link: did_pymc +:img-top: ../_static/thumbnails/ancova_pymc.png +:link: ancova_pymc :link-type: doc ::: -:::{grid-item-card} Banking dataset with a `pymc` model +:::: + +## Instrumental Variables Regression +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Instrumental Variable Modelling (IV) with `pymc` models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/did_pymc_banks.png -:link: did_pymc_banks +:img-top: ../_static/thumbnails/iv_pymc.png +:link: iv_pymc :link-type: doc ::: -:::{grid-item-card} Difference in Differences with scikit-learn models +:::{grid-item-card} Instrumental Regression and Justifying Instruments with `pymc` :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/did_skl.png -:link: did_skl +:img-top: ../_static/thumbnails/iv_weak_instruments.png +:link: iv_weak_instruments :link-type: doc ::: :::: @@ -101,6 +107,30 @@ ::: :::: +## Difference in Differences +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Difference in Differences with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_pymc.png +:link: did_pymc +:link-type: doc +::: +:::{grid-item-card} Banking dataset with a `pymc` model +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_pymc_banks.png +:link: did_pymc_banks +:link-type: doc +::: +:::{grid-item-card} Difference in Differences with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_skl.png +:link: did_skl +:link-type: doc +::: +:::: + ## Regression Discontinuity ::::{grid} 1 2 3 3 :gutter: 3 @@ -131,50 +161,20 @@ ::: :::: -## Regression Kink Design -::::{grid} 1 2 3 3 -:gutter: 3 - -:::{grid-item-card} Regression kink design with `pymc` models -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/rkink_pymc.png -:link: rkink_pymc -:link-type: doc -::: -:::: - -## Instrumental Variables Regression -::::{grid} 1 2 3 3 -:gutter: 3 - -:::{grid-item-card} Instrumental Variable Modelling (IV) with `pymc` models -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/iv_pymc.png -:link: iv_pymc -:link-type: doc -::: -:::{grid-item-card} Instrumental Regression and Justifying Instruments with `pymc` -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/iv_weak_instruments.png -:link: iv_weak_instruments -:link-type: doc -::: -:::: - -## Inverse Propensity Score Weighting +## Geographical lift testing ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference +:::{grid-item-card} Bayesian geolift with CausalPy :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/inv_prop_latent.png -:link: inv_prop_latent +:img-top: ../_static/thumbnails/geolift1.png +:link: geolift1 :link-type: doc ::: -:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` +:::{grid-item-card} Multi-cell geolift analysis :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/inv_prop_pymc.png -:link: inv_prop_pymc +:img-top: ../_static/thumbnails/multi_cell_geolift.png +:link: multi_cell_geolift :link-type: doc ::: :::: diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index 49493670..5fab55c2 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -8,6 +8,7 @@ """ import base64 +import io import re import sys from pathlib import Path @@ -41,27 +42,23 @@ def load_categories_from_index(index_path: Path) -> dict[str, list[str]]: dict[str, list[str]] Mapping from category name to list of notebook names (without .ipynb) """ - categories: dict[str, list[str]] = {} - current_category = None - if not index_path.exists(): - return categories + return {} try: - with open(index_path, "r", encoding="utf-8") as f: - for line in f: - # Check for category header (## Category Name) - if line.startswith("## "): - current_category = line[3:].strip() - if current_category and current_category != "Example Gallery": - categories[current_category] = [] - # Check for notebook links under current category - elif current_category and (match := re.search(r":link:\s+(\S+)", line)): - categories[current_category].append(match.group(1)) + categories: dict[str, list[str]] = {} + current_category = None + for line in index_path.read_text(encoding="utf-8").splitlines(): + if line.startswith("## "): + current_category = line[3:].strip() + if current_category and current_category != "Example Gallery": + categories[current_category] = [] + elif current_category and (match := re.search(r":link:\s+(\S+)", line)): + categories[current_category].append(match.group(1)) + return categories except Exception as e: print(f"Warning: Could not load categories from {index_path}: {e}") - - return categories + return {} def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) -> str: @@ -79,8 +76,7 @@ def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) def extract_metadata(notebook_path: Path) -> str: """Extract title from notebook.""" - with open(notebook_path, "r", encoding="utf-8") as f: - nb = nbformat.read(f, as_version=4) + nb = nbformat.reads(notebook_path.read_text(encoding="utf-8"), as_version=4) # Look for title in first markdown cell for cell in nb.cells: @@ -109,8 +105,7 @@ def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None: return None try: - with open(notebook_path, "r", encoding="utf-8") as f: - nb = nbformat.read(f, as_version=4) + nb = nbformat.reads(notebook_path.read_text(encoding="utf-8"), as_version=4) # Try to find images in existing outputs first if image_data := _find_image_in_notebook(nb): @@ -145,15 +140,12 @@ def _save_thumbnail( thumbnail_name = f"{notebook_path.stem}.png" thumbnail_path = output_dir / thumbnail_name - # Decode and save image - thumbnail_path.write_bytes(base64.b64decode(image_data)) - - # Resize to uniform size (400x250) with padding - img = Image.open(thumbnail_path) + # Decode and process image in memory + img = Image.open(io.BytesIO(base64.b64decode(image_data))) target_size = (400, 250) img.thumbnail(target_size, Image.Resampling.LANCZOS) - # Create padded image + # Create padded image and save new_img = Image.new("RGB", target_size, (255, 255, 255)) new_img.paste( img, @@ -179,9 +171,9 @@ def generate_gallery_markdown( categories.setdefault(nb_data["category"], []).append(nb_data) # Sort categories maintaining order from index.md - sorted_categories = [ - cat for cat in category_mapping.keys() if cat in categories - ] + [cat for cat in categories.keys() if cat not in category_mapping] + sorted_categories = list(category_mapping.keys() & categories.keys()) + list( + categories.keys() - category_mapping.keys() + ) # Generate markdown lines = ["# Example Gallery\n"] From de7ecb37c7078e61770f8854f63d9f4b89e3a71a Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:18:20 +0100 Subject: [PATCH 07/14] try fix --- .gitignore | 1 + .readthedocs.yaml | 3 + docs/source/notebooks/index.md | 136 ++++++++++++++++----------------- pyproject.toml | 3 + scripts/generate_gallery.py | 2 + 5 files changed, 77 insertions(+), 68 deletions(-) diff --git a/.gitignore b/.gitignore index be2ee558..0a46dc97 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ docs/build/ docs/jupyter_execute/ docs/source/api/generated/ docs/source/_static/thumbnails/ +# Note: thumbnails are generated during build (see .readthedocs.yaml) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 97c4f5a3..52e82175 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,6 +14,9 @@ build: # nodejs: "16" # rust: "1.55" # golang: "1.17" + commands: + # Generate gallery before building docs + - python scripts/generate_gallery.py # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 9df3018d..448d3bce 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -1,12 +1,12 @@ # Example Gallery -## Regression Kink Design +## ANCOVA ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Regression kink design with `pymc` models +:::{grid-item-card} ANCOVA for pre/post treatment nonequivalent group designs :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/rkink_pymc.png -:link: rkink_pymc +:img-top: ../_static/thumbnails/ancova_pymc.png +:link: ancova_pymc :link-type: doc ::: :::: @@ -35,32 +35,74 @@ ::: :::: -## Inverse Propensity Score Weighting +## Interrupted Time Series ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference +:::{grid-item-card} Excess deaths due to COVID-19 :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/inv_prop_latent.png -:link: inv_prop_latent +:img-top: ../_static/thumbnails/its_covid.png +:link: its_covid :link-type: doc ::: -:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` +:::{grid-item-card} Bayesian Interrupted Time Series :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/inv_prop_pymc.png -:link: inv_prop_pymc +:img-top: ../_static/thumbnails/its_pymc.png +:link: its_pymc +:link-type: doc +::: +:::{grid-item-card} Interrupted Time Series (ITS) with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_skl.png +:link: its_skl :link-type: doc ::: :::: -## ANCOVA +## Geographical lift testing ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} ANCOVA for pre/post treatment nonequivalent group designs +:::{grid-item-card} Bayesian geolift with CausalPy :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/ancova_pymc.png -:link: ancova_pymc +:img-top: ../_static/thumbnails/geolift1.png +:link: geolift1 +:link-type: doc +::: +:::{grid-item-card} Multi-cell geolift analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/multi_cell_geolift.png +:link: multi_cell_geolift +:link-type: doc +::: +:::: + +## Regression Discontinuity +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Sharp regression discontinuity with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_pymc.png +:link: rd_pymc +:link-type: doc +::: +:::{grid-item-card} Drinking age - Bayesian analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_pymc_drinking.png +:link: rd_pymc_drinking +:link-type: doc +::: +:::{grid-item-card} Sharp regression discontinuity with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_skl.png +:link: rd_skl +:link-type: doc +::: +:::{grid-item-card} Drinking age with a scikit-learn model +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_skl_drinking.png +:link: rd_skl_drinking :link-type: doc ::: :::: @@ -83,26 +125,14 @@ ::: :::: -## Interrupted Time Series +## Regression Kink Design ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Excess deaths due to COVID-19 -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/its_covid.png -:link: its_covid -:link-type: doc -::: -:::{grid-item-card} Bayesian Interrupted Time Series -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/its_pymc.png -:link: its_pymc -:link-type: doc -::: -:::{grid-item-card} Interrupted Time Series (ITS) with scikit-learn models +:::{grid-item-card} Regression kink design with `pymc` models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/its_skl.png -:link: its_skl +:img-top: ../_static/thumbnails/rkink_pymc.png +:link: rkink_pymc :link-type: doc ::: :::: @@ -131,50 +161,20 @@ ::: :::: -## Regression Discontinuity -::::{grid} 1 2 3 3 -:gutter: 3 - -:::{grid-item-card} Sharp regression discontinuity with `pymc` models -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/rd_pymc.png -:link: rd_pymc -:link-type: doc -::: -:::{grid-item-card} Drinking age - Bayesian analysis -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/rd_pymc_drinking.png -:link: rd_pymc_drinking -:link-type: doc -::: -:::{grid-item-card} Sharp regression discontinuity with scikit-learn models -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/rd_skl.png -:link: rd_skl -:link-type: doc -::: -:::{grid-item-card} Drinking age with a scikit-learn model -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/rd_skl_drinking.png -:link: rd_skl_drinking -:link-type: doc -::: -:::: - -## Geographical lift testing +## Inverse Propensity Score Weighting ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Bayesian geolift with CausalPy +:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/geolift1.png -:link: geolift1 +:img-top: ../_static/thumbnails/inv_prop_latent.png +:link: inv_prop_latent :link-type: doc ::: -:::{grid-item-card} Multi-cell geolift analysis +:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/multi_cell_geolift.png -:link: multi_cell_geolift +:img-top: ../_static/thumbnails/inv_prop_pymc.png +:link: inv_prop_pymc :link-type: doc ::: :::: diff --git a/pyproject.toml b/pyproject.toml index c212f395..aef3bcb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,9 @@ docs = [ "ipywidgets", "sphinx-design", "sphinx-togglebutton", + "nbformat", + "nbconvert", + "Pillow", ] lint = ["interrogate", "pre-commit", "ruff", "mypy"] test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert"] diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index 5fab55c2..2fb60e1f 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -153,6 +153,8 @@ def _save_thumbnail( ) new_img.save(thumbnail_path) + # Path relative to document location (notebooks/) + # Need to go up one level to source/, then into _static/thumbnails/ return f"../_static/thumbnails/{thumbnail_name}" except Exception as e: print(f"Warning: Could not save thumbnail for {notebook_path.name}: {e}") From c53ae3915e99f9b11bcdf44fc5541f0305a21564 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:22:49 +0100 Subject: [PATCH 08/14] alphabeticall order --- docs/source/notebooks/index.md | 126 ++++++++++++++++----------------- scripts/generate_gallery.py | 6 +- 2 files changed, 65 insertions(+), 67 deletions(-) diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 448d3bce..60ea876c 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -11,26 +11,62 @@ ::: :::: -## Synthetic Control +## Difference in Differences ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Synthetic control with `pymc` models +:::{grid-item-card} Difference in Differences with `pymc` models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/sc_pymc.png -:link: sc_pymc +:img-top: ../_static/thumbnails/did_pymc.png +:link: did_pymc :link-type: doc ::: -:::{grid-item-card} The effects of Brexit +:::{grid-item-card} Banking dataset with a `pymc` model :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/sc_pymc_brexit.png -:link: sc_pymc_brexit +:img-top: ../_static/thumbnails/did_pymc_banks.png +:link: did_pymc_banks :link-type: doc ::: -:::{grid-item-card} Synthetic control with scikit-learn models +:::{grid-item-card} Difference in Differences with scikit-learn models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/sc_skl.png -:link: sc_skl +:img-top: ../_static/thumbnails/did_skl.png +:link: did_skl +:link-type: doc +::: +:::: + +## Geographical lift testing +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Bayesian geolift with CausalPy +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/geolift1.png +:link: geolift1 +:link-type: doc +::: +:::{grid-item-card} Multi-cell geolift analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/multi_cell_geolift.png +:link: multi_cell_geolift +:link-type: doc +::: +:::: + +## Instrumental Variables Regression +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Instrumental Variable Modelling (IV) with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/iv_pymc.png +:link: iv_pymc +:link-type: doc +::: +:::{grid-item-card} Instrumental Regression and Justifying Instruments with `pymc` +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/iv_weak_instruments.png +:link: iv_weak_instruments :link-type: doc ::: :::: @@ -59,20 +95,20 @@ ::: :::: -## Geographical lift testing +## Inverse Propensity Score Weighting ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Bayesian geolift with CausalPy +:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/geolift1.png -:link: geolift1 +:img-top: ../_static/thumbnails/inv_prop_latent.png +:link: inv_prop_latent :link-type: doc ::: -:::{grid-item-card} Multi-cell geolift analysis +:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/multi_cell_geolift.png -:link: multi_cell_geolift +:img-top: ../_static/thumbnails/inv_prop_pymc.png +:link: inv_prop_pymc :link-type: doc ::: :::: @@ -107,24 +143,6 @@ ::: :::: -## Instrumental Variables Regression -::::{grid} 1 2 3 3 -:gutter: 3 - -:::{grid-item-card} Instrumental Variable Modelling (IV) with `pymc` models -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/iv_pymc.png -:link: iv_pymc -:link-type: doc -::: -:::{grid-item-card} Instrumental Regression and Justifying Instruments with `pymc` -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/iv_weak_instruments.png -:link: iv_weak_instruments -:link-type: doc -::: -:::: - ## Regression Kink Design ::::{grid} 1 2 3 3 :gutter: 3 @@ -137,44 +155,26 @@ ::: :::: -## Difference in Differences +## Synthetic Control ::::{grid} 1 2 3 3 :gutter: 3 -:::{grid-item-card} Difference in Differences with `pymc` models -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/did_pymc.png -:link: did_pymc -:link-type: doc -::: -:::{grid-item-card} Banking dataset with a `pymc` model -:class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/did_pymc_banks.png -:link: did_pymc_banks -:link-type: doc -::: -:::{grid-item-card} Difference in Differences with scikit-learn models +:::{grid-item-card} Synthetic control with `pymc` models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/did_skl.png -:link: did_skl +:img-top: ../_static/thumbnails/sc_pymc.png +:link: sc_pymc :link-type: doc ::: -:::: - -## Inverse Propensity Score Weighting -::::{grid} 1 2 3 3 -:gutter: 3 - -:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference +:::{grid-item-card} The effects of Brexit :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/inv_prop_latent.png -:link: inv_prop_latent +:img-top: ../_static/thumbnails/sc_pymc_brexit.png +:link: sc_pymc_brexit :link-type: doc ::: -:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` +:::{grid-item-card} Synthetic control with scikit-learn models :class-card: sd-card-h-100 -:img-top: ../_static/thumbnails/inv_prop_pymc.png -:link: inv_prop_pymc +:img-top: ../_static/thumbnails/sc_skl.png +:link: sc_skl :link-type: doc ::: :::: diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index 2fb60e1f..2ba65796 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -172,10 +172,8 @@ def generate_gallery_markdown( for nb_data in notebooks_data: categories.setdefault(nb_data["category"], []).append(nb_data) - # Sort categories maintaining order from index.md - sorted_categories = list(category_mapping.keys() & categories.keys()) + list( - categories.keys() - category_mapping.keys() - ) + # Sort categories alphabetically + sorted_categories = sorted(categories.keys()) # Generate markdown lines = ["# Example Gallery\n"] From 77d0fda584ebcee8555d097bc367465f56a24249 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:26:07 +0100 Subject: [PATCH 09/14] fix dependencies --- .readthedocs.yaml | 3 --- docs/source/conf.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 52e82175..97c4f5a3 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,9 +14,6 @@ build: # nodejs: "16" # rust: "1.55" # golang: "1.17" - commands: - # Generate gallery before building docs - - python scripts/generate_gallery.py # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/docs/source/conf.py b/docs/source/conf.py index 938537bf..8ac503b2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -11,11 +11,40 @@ import os import sys +from pathlib import Path from causalpy.version import __version__ sys.path.insert(0, os.path.abspath("../")) + +# Generate gallery before building docs +# This runs after dependencies are installed but before Sphinx processes files +def generate_gallery(): + """Generate example gallery from notebooks.""" + try: + # Import here to avoid errors if dependencies aren't available + import subprocess + + repo_root = Path(__file__).parent.parent.parent + script_path = repo_root / "scripts" / "generate_gallery.py" + + if script_path.exists(): + result = subprocess.run( + [sys.executable, str(script_path)], + cwd=str(repo_root), + capture_output=True, + text=True, + ) + if result.returncode != 0: + print(f"Warning: Gallery generation failed: {result.stderr}") + except Exception as e: + print(f"Warning: Could not generate gallery: {e}") + + +# Generate gallery during Sphinx setup +generate_gallery() + # autodoc_mock_imports # This avoids autodoc breaking when it can't find packages imported in the code. # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_mock_imports # noqa: E501 From bd5b202b1f24958978c26c4f81d348c6a70f433d Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:29:52 +0100 Subject: [PATCH 10/14] little description --- docs/source/notebooks/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 60ea876c..85170e02 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -1,4 +1,7 @@ # Example Gallery + +On this page you can find a gallery of example notebooks that demonstrate the use of CausalPy. + ## ANCOVA ::::{grid} 1 2 3 3 :gutter: 3 From 3bd5e8d462873262f05611e6d2ea1eb301b1a2bd Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Wed, 12 Nov 2025 20:42:52 +0100 Subject: [PATCH 11/14] add descriptions --- docs/source/notebooks/index.md | 27 +++++++ scripts/generate_gallery.py | 132 ++------------------------------- 2 files changed, 34 insertions(+), 125 deletions(-) diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 85170e02..0ab48e5d 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -3,6 +3,9 @@ On this page you can find a gallery of example notebooks that demonstrate the use of CausalPy. ## ANCOVA + +Analysis of covariance is a simple linear model, typically with one continuous predictor (the covariate) and a categorical variable (which may correspond to treatment or control group). In the context of this package, ANCOVA could be useful in pre-post treatment designs, either with or without random assignment. This is similar to the approach of difference in differences, but only applicable with a single pre and post treatment measure. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -15,6 +18,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Difference in Differences + +Analysis where the treatment effect is estimated as a difference between treatment conditions in the differences between pre-treatment to post treatment observations. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -39,6 +45,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Geographical lift testing + +Geolift (geographical lift testing) is a method for measuring the causal impact of interventions in geographic regions. It combines synthetic control methods with difference-in-differences approaches to estimate treatment effects when interventions are applied to specific geographic areas. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -57,6 +66,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Instrumental Variables Regression + +A quasi-experimental design to estimate a treatment effect where there is a risk of confounding between the treatment and the outcome due to endogeneity. Instrumental variables help identify causal effects by using variables that affect treatment assignment but not the outcome directly. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -75,6 +87,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Interrupted Time Series + +A quasi-experimental design that uses time series methods to generate counterfactuals and estimate treatment effects. A series of observations are collected before and after a treatment, and the pre-treatment trend (or any time-series model) is used to predict what would have happened in the absence of treatment. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -99,6 +114,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Inverse Propensity Score Weighting + +A method for estimating causal effects by weighting observations by the inverse of their probability of receiving treatment (propensity score). This helps adjust for confounding by creating a pseudo-population where treatment assignment is independent of observed covariates. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -117,6 +135,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Regression Discontinuity + +A quasi-experimental design where treatment assignment is determined by a cutoff point along a running variable (e.g., test score, age, income). The treatment effect is estimated by comparing outcomes just above and below the cutoff, assuming units near the cutoff are similar except for treatment status. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -147,6 +168,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Regression Kink Design + +A variation of regression discontinuity where treatment affects the slope (rate of change) of the outcome with respect to the running variable, rather than causing a discrete jump. The treatment effect is identified by a change in the slope at the cutoff point. + ::::{grid} 1 2 3 3 :gutter: 3 @@ -159,6 +183,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us :::: ## Synthetic Control + +The synthetic control method is a statistical method used to evaluate the effect of an intervention in comparative case studies. It involves the construction of a weighted combination of groups used as controls, to which the treatment group is compared. + ::::{grid} 1 2 3 3 :gutter: 3 diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py index 2ba65796..8ab7649d 100755 --- a/scripts/generate_gallery.py +++ b/scripts/generate_gallery.py @@ -1,15 +1,14 @@ #!/usr/bin/env python3 """ -Generate example gallery for CausalPy documentation. +Generate thumbnails for CausalPy documentation gallery. -This script scans notebooks in docs/source/notebooks/, extracts metadata, -generates thumbnails from the first plot in each notebook, and creates -a gallery page using sphinx-design cards. +This script scans notebooks in docs/source/notebooks/ and generates +thumbnails from the first plot in each notebook. The index.md file +should be maintained manually. """ import base64 import io -import re import sys from pathlib import Path @@ -29,65 +28,6 @@ Image = None # type: ignore[assignment,misc] -def load_categories_from_index(index_path: Path) -> dict[str, list[str]]: - """ - Load category structure from existing index.md. - - Reads the markdown file and extracts: - - Category names from ## headers - - Notebook names from :link: fields under each category - - Returns - ------- - dict[str, list[str]] - Mapping from category name to list of notebook names (without .ipynb) - """ - if not index_path.exists(): - return {} - - try: - categories: dict[str, list[str]] = {} - current_category = None - for line in index_path.read_text(encoding="utf-8").splitlines(): - if line.startswith("## "): - current_category = line[3:].strip() - if current_category and current_category != "Example Gallery": - categories[current_category] = [] - elif current_category and (match := re.search(r":link:\s+(\S+)", line)): - categories[current_category].append(match.group(1)) - return categories - except Exception as e: - print(f"Warning: Could not load categories from {index_path}: {e}") - return {} - - -def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) -> str: - """Determine the category for a notebook from the loaded mapping.""" - notebook_name = filename.replace(".ipynb", "") - return next( - ( - cat - for cat, notebooks in category_mapping.items() - if notebook_name in notebooks - ), - "Other", - ) - - -def extract_metadata(notebook_path: Path) -> str: - """Extract title from notebook.""" - nb = nbformat.reads(notebook_path.read_text(encoding="utf-8"), as_version=4) - - # Look for title in first markdown cell - for cell in nb.cells: - if cell.cell_type == "markdown": - if match := re.search(r"^#+\s+(.+)$", cell.source.strip(), re.MULTILINE): - return match.group(1).strip() - - # Fallback to filename-based title - return notebook_path.stem.replace("_", " ").title() - - def _find_image_in_notebook(nb) -> str | None: """Find first PNG image in notebook outputs.""" for cell in nb.cells: @@ -161,60 +101,16 @@ def _save_thumbnail( return None -def generate_gallery_markdown( - notebooks_data: list[dict], - output_path: Path, - category_mapping: dict[str, list[str]], -): - """Generate gallery markdown file with sphinx-design cards.""" - # Group notebooks by category - categories: dict[str, list[dict]] = {} - for nb_data in notebooks_data: - categories.setdefault(nb_data["category"], []).append(nb_data) - - # Sort categories alphabetically - sorted_categories = sorted(categories.keys()) - - # Generate markdown - lines = ["# Example Gallery\n"] - - for category in sorted_categories: - notebooks = sorted(categories[category], key=lambda x: x["filename"]) - - lines.extend([f"## {category}\n", "::::{grid} 1 2 3 3\n", ":gutter: 3\n\n"]) - - for nb in notebooks: - doc_name = nb["filename"].replace(".ipynb", "") - card_lines = [ - f":::{'{grid-item-card}'} {nb['title']}\n", - ":class-card: sd-card-h-100\n", - ] - if nb.get("thumbnail"): - card_lines.append(f":img-top: {nb['thumbnail']}\n") - card_lines.extend([f":link: {doc_name}\n", ":link-type: doc\n", ":::\n"]) - lines.extend(card_lines) - - lines.append("::::\n\n") - - output_path.write_text("".join(lines), encoding="utf-8") - - def main(): - """Main function to generate gallery.""" + """Main function to generate thumbnails only.""" # Paths repo_root = Path(__file__).parent.parent notebooks_dir = repo_root / "docs" / "source" / "notebooks" thumbnails_dir = repo_root / "docs" / "source" / "_static" / "thumbnails" - output_file = notebooks_dir / "index.md" # Create thumbnails directory thumbnails_dir.mkdir(parents=True, exist_ok=True) - # Load category structure from existing index.md - category_mapping = load_categories_from_index(output_file) - if category_mapping: - print(f"Loaded {len(category_mapping)} categories from index.md") - # Find all notebooks notebook_files = sorted(notebooks_dir.glob("*.ipynb")) @@ -224,25 +120,11 @@ def main(): print(f"Found {len(notebook_files)} notebooks") - # Process each notebook - notebooks_data = [] + # Process each notebook to generate thumbnails for nb_path in notebook_files: print(f"Processing {nb_path.name}...") + extract_first_image(nb_path, thumbnails_dir) - notebooks_data.append( - { - "filename": nb_path.name, - "title": extract_metadata(nb_path), - "category": get_notebook_category(nb_path.name, category_mapping), - "thumbnail": extract_first_image(nb_path, thumbnails_dir), - } - ) - - # Generate gallery markdown - print("Generating gallery markdown...") - generate_gallery_markdown(notebooks_data, output_file, category_mapping) - - print(f"Gallery generated successfully at {output_file}") print(f"Thumbnails saved to {thumbnails_dir}") From 7509316d13c69d7258c5e4536deff91ccd2de878 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Fri, 14 Nov 2025 09:25:33 +0100 Subject: [PATCH 12/14] use howto --- docs/source/notebooks/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 0ab48e5d..4b8408c9 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -1,4 +1,4 @@ -# Example Gallery +# How-to On this page you can find a gallery of example notebooks that demonstrate the use of CausalPy. From 4271497ce0ec357b10d640b4ce46bbad5f895128 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Fri, 14 Nov 2025 09:31:00 +0100 Subject: [PATCH 13/14] add table of contents on the side --- docs/source/_static/gallery.css | 6 ++++++ docs/source/conf.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/docs/source/_static/gallery.css b/docs/source/_static/gallery.css index 7fee587d..e21a8c77 100644 --- a/docs/source/_static/gallery.css +++ b/docs/source/_static/gallery.css @@ -34,3 +34,9 @@ .sd-grid { align-items: stretch; } + +/* Hide right sidebar - move "On this page" to left sidebar via html_sidebars config */ +.bd-sidebar-secondary, +.sidebar-secondary { + display: none !important; +} diff --git a/docs/source/conf.py b/docs/source/conf.py index 8ac503b2..75b24c76 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -185,6 +185,13 @@ def generate_gallery(): }, "analytics": {"google_analytics_id": "G-3MCDG3M7X6"}, } + +# Configure sidebars: show local TOC ("On this page") in left sidebar +# This moves "On this page" navigation from right sidebar to left sidebar +html_sidebars = { + "**": ["localtoc.html"], # Show "On this page" navigation in left sidebar +} + html_context = { "github_user": "pymc-labs", "github_repo": "CausalPy", From cd069ccb0a0122605822a5a5d1dcee9c4704e717 Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Fri, 14 Nov 2025 09:34:47 +0100 Subject: [PATCH 14/14] docs --- AGENTS.md | 18 ++++++++++++++++++ CONTRIBUTING.md | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 9baa28eb..b842fa10 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,6 +31,24 @@ - **Build**: Use `make html` to build documentation - **Doctest**: Use `make doctest` to test that Python examples in doctests work +### Adding new notebooks to the gallery + +When creating a new example notebook: + +1. **Place it** in `docs/source/notebooks/` with naming pattern `{method}_{model}.ipynb` +2. **Include at least one plot** in the notebook outputs (the first PNG image will be used as the thumbnail) +3. **Manually add it to `docs/source/notebooks/index.md`**: + - Find the appropriate category section or create a new one + - Add a `grid-item-card` entry with: + - `:img-top: ../_static/thumbnails/{notebook_name}.png` (thumbnail path) + - `:link: {notebook_name_without_extension}` (notebook name without `.ipynb`) + - `:link-type: doc` + - Cards are arranged in 3-column grids using `sphinx-design` +4. **Thumbnails are generated automatically** during the build process by `scripts/generate_gallery.py` (runs via `conf.py` during Sphinx setup) +5. **Test locally** with `make html` and check `docs/_build/html/notebooks/index.html` + +**Important**: The `index.md` file is manually maintained. The `generate_gallery.py` script only generates thumbnails; it does not modify `index.md`. Thumbnails are gitignored (`docs/source/_static/thumbnails/`) and generated on-demand during builds. + ## Code structure and style - **Experiment classes**: All experiment classes inherit from `BaseExperiment` in `causalpy/experiments/`. Must declare `supports_ols` and `supports_bayes` class attributes. Only implement abstract methods for supported model types (e.g., if only Bayesian is supported, implement `_bayesian_plot()` and `get_plot_data_bayesian()`; if only OLS is supported, implement `_ols_plot()` and `get_plot_data_ols()`) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a0bb3962..4bb56986 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -149,6 +149,43 @@ We recommend that your contribution complies with the following guidelines befor - When adding additional functionality, either edit an existing example, or create a new example (typically in the form of a Jupyter Notebook). Have a look at other examples for reference. Examples should demonstrate why the new functionality is useful in practice. +### Adding a new example notebook + +When adding a new example notebook to the documentation gallery: + +1. **Place the notebook** in `docs/source/notebooks/` following the naming convention `{method}_{model}.ipynb` (e.g., `did_pymc.ipynb`, `rd_skl.ipynb`). + +2. **Ensure the notebook has at least one plot/figure** in its outputs. The gallery generation script (`scripts/generate_gallery.py`) will automatically extract the first PNG image from the notebook outputs to create a thumbnail. If the notebook has no outputs, the script will attempt to execute it to generate the thumbnail. + +3. **Add the notebook to the gallery** by manually editing `docs/source/notebooks/index.md`: + - Find the appropriate category section (e.g., "Difference in Differences", "Regression Discontinuity") or create a new one if needed + - Add a new `grid-item-card` entry within the category's grid, following this format: + ```markdown + :::{grid-item-card} Your Notebook Title + :class-card: sd-card-h-100 + :img-top: ../_static/thumbnails/{notebook_name}.png + :link: {notebook_name_without_extension} + :link-type: doc + ::: + ``` + - The `:img-top:` path should reference `../_static/thumbnails/{notebook_name}.png` (the thumbnail will be generated automatically) + - The `:link:` should be the notebook name without the `.ipynb` extension + - Cards are arranged in a 3-column grid layout + +4. **Generate thumbnails locally** (optional, for testing): + ```bash + python scripts/generate_gallery.py + ``` + Thumbnails are automatically generated during the documentation build process, so you don't need to commit them (the `docs/source/_static/thumbnails/` directory is gitignored). + +5. **Build and test the documentation** to verify the notebook appears correctly in the gallery: + ```bash + make html + ``` + Then open `docs/_build/html/notebooks/index.html` in your browser to see the gallery. + +**Note**: The gallery generation script (`scripts/generate_gallery.py`) runs automatically during the Sphinx build process (configured in `docs/source/conf.py`), so thumbnails will be generated on Read the Docs builds without needing to commit them. + - Documentation and high-coverage tests are necessary for enhancements to be accepted. - Documentation follows [NumPy style guide](https://numpydoc.readthedocs.io/en/latest/format.html)