diff --git a/.gitignore b/.gitignore index 9a6b0579..0a46dc97 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ dist/ docs/build/ docs/jupyter_execute/ docs/source/api/generated/ +docs/source/_static/thumbnails/ +# Note: thumbnails are generated during build (see .readthedocs.yaml) diff --git a/AGENTS.md b/AGENTS.md index 9baa28eb..b842fa10 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -31,6 +31,24 @@ - **Build**: Use `make html` to build documentation - **Doctest**: Use `make doctest` to test that Python examples in doctests work +### Adding new notebooks to the gallery + +When creating a new example notebook: + +1. **Place it** in `docs/source/notebooks/` with naming pattern `{method}_{model}.ipynb` +2. **Include at least one plot** in the notebook outputs (the first PNG image will be used as the thumbnail) +3. **Manually add it to `docs/source/notebooks/index.md`**: + - Find the appropriate category section or create a new one + - Add a `grid-item-card` entry with: + - `:img-top: ../_static/thumbnails/{notebook_name}.png` (thumbnail path) + - `:link: {notebook_name_without_extension}` (notebook name without `.ipynb`) + - `:link-type: doc` + - Cards are arranged in 3-column grids using `sphinx-design` +4. **Thumbnails are generated automatically** during the build process by `scripts/generate_gallery.py` (runs via `conf.py` during Sphinx setup) +5. **Test locally** with `make html` and check `docs/_build/html/notebooks/index.html` + +**Important**: The `index.md` file is manually maintained. The `generate_gallery.py` script only generates thumbnails; it does not modify `index.md`. Thumbnails are gitignored (`docs/source/_static/thumbnails/`) and generated on-demand during builds. + ## Code structure and style - **Experiment classes**: All experiment classes inherit from `BaseExperiment` in `causalpy/experiments/`. Must declare `supports_ols` and `supports_bayes` class attributes. Only implement abstract methods for supported model types (e.g., if only Bayesian is supported, implement `_bayesian_plot()` and `get_plot_data_bayesian()`; if only OLS is supported, implement `_ols_plot()` and `get_plot_data_ols()`) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a0bb3962..4bb56986 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -149,6 +149,43 @@ We recommend that your contribution complies with the following guidelines befor - When adding additional functionality, either edit an existing example, or create a new example (typically in the form of a Jupyter Notebook). Have a look at other examples for reference. Examples should demonstrate why the new functionality is useful in practice. +### Adding a new example notebook + +When adding a new example notebook to the documentation gallery: + +1. **Place the notebook** in `docs/source/notebooks/` following the naming convention `{method}_{model}.ipynb` (e.g., `did_pymc.ipynb`, `rd_skl.ipynb`). + +2. **Ensure the notebook has at least one plot/figure** in its outputs. The gallery generation script (`scripts/generate_gallery.py`) will automatically extract the first PNG image from the notebook outputs to create a thumbnail. If the notebook has no outputs, the script will attempt to execute it to generate the thumbnail. + +3. **Add the notebook to the gallery** by manually editing `docs/source/notebooks/index.md`: + - Find the appropriate category section (e.g., "Difference in Differences", "Regression Discontinuity") or create a new one if needed + - Add a new `grid-item-card` entry within the category's grid, following this format: + ```markdown + :::{grid-item-card} Your Notebook Title + :class-card: sd-card-h-100 + :img-top: ../_static/thumbnails/{notebook_name}.png + :link: {notebook_name_without_extension} + :link-type: doc + ::: + ``` + - The `:img-top:` path should reference `../_static/thumbnails/{notebook_name}.png` (the thumbnail will be generated automatically) + - The `:link:` should be the notebook name without the `.ipynb` extension + - Cards are arranged in a 3-column grid layout + +4. **Generate thumbnails locally** (optional, for testing): + ```bash + python scripts/generate_gallery.py + ``` + Thumbnails are automatically generated during the documentation build process, so you don't need to commit them (the `docs/source/_static/thumbnails/` directory is gitignored). + +5. **Build and test the documentation** to verify the notebook appears correctly in the gallery: + ```bash + make html + ``` + Then open `docs/_build/html/notebooks/index.html` in your browser to see the gallery. + +**Note**: The gallery generation script (`scripts/generate_gallery.py`) runs automatically during the Sphinx build process (configured in `docs/source/conf.py`), so thumbnails will be generated on Read the Docs builds without needing to commit them. + - Documentation and high-coverage tests are necessary for enhancements to be accepted. - Documentation follows [NumPy style guide](https://numpydoc.readthedocs.io/en/latest/format.html) diff --git a/Makefile b/Makefile index 604b0532..d80ce7fd 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ PACKAGE_DIR = causalpy # COMMANDS # ################################################################################# -.PHONY: init lint check_lint test uml html cleandocs doctest help +.PHONY: init lint check_lint test uml gallery html cleandocs doctest help init: ## Install the package in editable mode python -m pip install -e . --no-deps @@ -31,7 +31,10 @@ test: ## Run all tests with pytest uml: ## Generate UML diagrams from code pyreverse -o png causalpy --output-directory docs/source/_static --ignore tests -html: ## Build HTML documentation with Sphinx +gallery: ## Generate example gallery from notebooks + python scripts/generate_gallery.py + +html: gallery ## Build HTML documentation with Sphinx sphinx-build -b html docs/source docs/_build cleandocs: ## Clean the documentation build directories diff --git a/docs/source/_static/gallery.css b/docs/source/_static/gallery.css new file mode 100644 index 00000000..e21a8c77 --- /dev/null +++ b/docs/source/_static/gallery.css @@ -0,0 +1,42 @@ +/* Custom CSS for uniform gallery card sizes - square-like cards */ +.sd-card { + height: 100%; + display: flex; + flex-direction: column; +} + +.sd-card-body { + flex-grow: 1; + display: flex; + flex-direction: column; + padding: 0.5rem; +} + +.sd-card-img-top { + width: 100%; + height: 250px; + object-fit: contain; + background-color: #f8f9fa; + padding: 8px; +} + +.sd-card-header { + padding: 0.75rem 0.5rem; + line-height: 1.3; + min-height: auto; +} + +.sd-grid-item { + display: flex; +} + +/* Ensure grid items stretch to same height */ +.sd-grid { + align-items: stretch; +} + +/* Hide right sidebar - move "On this page" to left sidebar via html_sidebars config */ +.bd-sidebar-secondary, +.sidebar-secondary { + display: none !important; +} diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg index a00d0758..8734d55d 100644 --- a/docs/source/_static/interrogate_badge.svg +++ b/docs/source/_static/interrogate_badge.svg @@ -1,5 +1,5 @@ - interrogate: 95.7% + interrogate: 95.8% @@ -12,8 +12,8 @@ interrogate interrogate - 95.7% - 95.7% + 95.8% + 95.8% diff --git a/docs/source/conf.py b/docs/source/conf.py index e298dfd1..75b24c76 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -11,11 +11,40 @@ import os import sys +from pathlib import Path from causalpy.version import __version__ sys.path.insert(0, os.path.abspath("../")) + +# Generate gallery before building docs +# This runs after dependencies are installed but before Sphinx processes files +def generate_gallery(): + """Generate example gallery from notebooks.""" + try: + # Import here to avoid errors if dependencies aren't available + import subprocess + + repo_root = Path(__file__).parent.parent.parent + script_path = repo_root / "scripts" / "generate_gallery.py" + + if script_path.exists(): + result = subprocess.run( + [sys.executable, str(script_path)], + cwd=str(repo_root), + capture_output=True, + text=True, + ) + if result.returncode != 0: + print(f"Warning: Gallery generation failed: {result.stderr}") + except Exception as e: + print(f"Warning: Could not generate gallery: {e}") + + +# Generate gallery during Sphinx setup +generate_gallery() + # autodoc_mock_imports # This avoids autodoc breaking when it can't find packages imported in the code. # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_mock_imports # noqa: E501 @@ -81,6 +110,13 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] master_doc = "index" +# Suppress warnings for notebooks linked from gallery (not in toctree) +suppress_warnings = [ + "toc.not_included", # Notebooks are linked from gallery, not toctree + "bibtex.duplicate_label", # BibTeX duplicate labels (less critical) + "bibtex.duplicate_citation", # BibTeX duplicate citations (less critical) +] + # bibtex config bibtex_bibfiles = ["references.bib"] bibtex_default_style = "unsrt" @@ -138,6 +174,7 @@ html_theme = "labs_sphinx_theme" html_static_path = ["_static"] html_favicon = "_static/favicon_logo.png" +html_css_files = ["gallery.css"] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. @@ -148,6 +185,13 @@ }, "analytics": {"google_analytics_id": "G-3MCDG3M7X6"}, } + +# Configure sidebars: show local TOC ("On this page") in left sidebar +# This moves "On this page" navigation from right sidebar to left sidebar +html_sidebars = { + "**": ["localtoc.html"], # Show "On this page" navigation in left sidebar +} + html_context = { "github_user": "pymc-labs", "github_repo": "CausalPy", diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md index 17eaa40c..4b8408c9 100644 --- a/docs/source/notebooks/index.md +++ b/docs/source/notebooks/index.md @@ -1,75 +1,210 @@ # How-to -:::{toctree} -:caption: ANCOVA -:maxdepth: 1 +On this page you can find a gallery of example notebooks that demonstrate the use of CausalPy. -ancova_pymc.ipynb +## ANCOVA + +Analysis of covariance is a simple linear model, typically with one continuous predictor (the covariate) and a categorical variable (which may correspond to treatment or control group). In the context of this package, ANCOVA could be useful in pre-post treatment designs, either with or without random assignment. This is similar to the approach of difference in differences, but only applicable with a single pre and post treatment measure. + +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} ANCOVA for pre/post treatment nonequivalent group designs +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/ancova_pymc.png +:link: ancova_pymc +:link-type: doc ::: +:::: + +## Difference in Differences -:::{toctree} -:caption: Synthetic Control -:maxdepth: 1 +Analysis where the treatment effect is estimated as a difference between treatment conditions in the differences between pre-treatment to post treatment observations. -sc_skl.ipynb -sc_pymc.ipynb -sc_pymc_brexit.ipynb +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Difference in Differences with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_pymc.png +:link: did_pymc +:link-type: doc +::: +:::{grid-item-card} Banking dataset with a `pymc` model +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_pymc_banks.png +:link: did_pymc_banks +:link-type: doc ::: +:::{grid-item-card} Difference in Differences with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/did_skl.png +:link: did_skl +:link-type: doc +::: +:::: + +## Geographical lift testing -:::{toctree} -:caption: Geographical lift testing -:maxdepth: 1 +Geolift (geographical lift testing) is a method for measuring the causal impact of interventions in geographic regions. It combines synthetic control methods with difference-in-differences approaches to estimate treatment effects when interventions are applied to specific geographic areas. -geolift1.ipynb -multi_cell_geolift.ipynb +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Bayesian geolift with CausalPy +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/geolift1.png +:link: geolift1 +:link-type: doc +::: +:::{grid-item-card} Multi-cell geolift analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/multi_cell_geolift.png +:link: multi_cell_geolift +:link-type: doc ::: +:::: + +## Instrumental Variables Regression + +A quasi-experimental design to estimate a treatment effect where there is a risk of confounding between the treatment and the outcome due to endogeneity. Instrumental variables help identify causal effects by using variables that affect treatment assignment but not the outcome directly. -:::{toctree} -:caption: Difference in Differences -:maxdepth: 1 +::::{grid} 1 2 3 3 +:gutter: 3 -did_skl.ipynb -did_pymc.ipynb -did_pymc_banks.ipynb +:::{grid-item-card} Instrumental Variable Modelling (IV) with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/iv_pymc.png +:link: iv_pymc +:link-type: doc ::: +:::{grid-item-card} Instrumental Regression and Justifying Instruments with `pymc` +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/iv_weak_instruments.png +:link: iv_weak_instruments +:link-type: doc +::: +:::: + +## Interrupted Time Series -:::{toctree} -:caption: Interrupted Time Series -:maxdepth: 1 +A quasi-experimental design that uses time series methods to generate counterfactuals and estimate treatment effects. A series of observations are collected before and after a treatment, and the pre-treatment trend (or any time-series model) is used to predict what would have happened in the absence of treatment. -its_skl.ipynb -its_pymc.ipynb -its_covid.ipynb +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Excess deaths due to COVID-19 +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_covid.png +:link: its_covid +:link-type: doc +::: +:::{grid-item-card} Bayesian Interrupted Time Series +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_pymc.png +:link: its_pymc +:link-type: doc ::: +:::{grid-item-card} Interrupted Time Series (ITS) with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/its_skl.png +:link: its_skl +:link-type: doc +::: +:::: + +## Inverse Propensity Score Weighting -:::{toctree} -:caption: Regression Discontinuity -:maxdepth: 1 +A method for estimating causal effects by weighting observations by the inverse of their probability of receiving treatment (propensity score). This helps adjust for confounding by creating a pseudo-population where treatment assignment is independent of observed covariates. -rd_skl.ipynb -rd_pymc.ipynb -rd_pymc_drinking.ipynb +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} The Paradox of Propensity Scores in Bayesian Inference +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/inv_prop_latent.png +:link: inv_prop_latent +:link-type: doc +::: +:::{grid-item-card} Inverse Propensity Score Weighting with `pymc` +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/inv_prop_pymc.png +:link: inv_prop_pymc +:link-type: doc ::: +:::: -:::{toctree} -:caption: Regression Kink Design -:maxdepth: 1 +## Regression Discontinuity -rkink_pymc.ipynb +A quasi-experimental design where treatment assignment is determined by a cutoff point along a running variable (e.g., test score, age, income). The treatment effect is estimated by comparing outcomes just above and below the cutoff, assuming units near the cutoff are similar except for treatment status. + +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Sharp regression discontinuity with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_pymc.png +:link: rd_pymc +:link-type: doc +::: +:::{grid-item-card} Drinking age - Bayesian analysis +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_pymc_drinking.png +:link: rd_pymc_drinking +:link-type: doc +::: +:::{grid-item-card} Sharp regression discontinuity with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_skl.png +:link: rd_skl +:link-type: doc ::: +:::{grid-item-card} Drinking age with a scikit-learn model +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rd_skl_drinking.png +:link: rd_skl_drinking +:link-type: doc +::: +:::: + +## Regression Kink Design -:::{toctree} -:caption: Instrumental Variables Regression -:maxdepth: 1 +A variation of regression discontinuity where treatment affects the slope (rate of change) of the outcome with respect to the running variable, rather than causing a discrete jump. The treatment effect is identified by a change in the slope at the cutoff point. -iv_pymc.ipynb -iv_weak_instruments.ipynb +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Regression kink design with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/rkink_pymc.png +:link: rkink_pymc +:link-type: doc ::: +:::: + +## Synthetic Control -:::{toctree} -:caption: Inverse Propensity Score Weighting -:maxdepth: 1 +The synthetic control method is a statistical method used to evaluate the effect of an intervention in comparative case studies. It involves the construction of a weighted combination of groups used as controls, to which the treatment group is compared. -inv_prop_pymc.ipynb -inv_prop_latent.ipynb +::::{grid} 1 2 3 3 +:gutter: 3 + +:::{grid-item-card} Synthetic control with `pymc` models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/sc_pymc.png +:link: sc_pymc +:link-type: doc +::: +:::{grid-item-card} The effects of Brexit +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/sc_pymc_brexit.png +:link: sc_pymc_brexit +:link-type: doc +::: +:::{grid-item-card} Synthetic control with scikit-learn models +:class-card: sd-card-h-100 +:img-top: ../_static/thumbnails/sc_skl.png +:link: sc_skl +:link-type: doc ::: +:::: diff --git a/pyproject.toml b/pyproject.toml index c212f395..aef3bcb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,9 @@ docs = [ "ipywidgets", "sphinx-design", "sphinx-togglebutton", + "nbformat", + "nbconvert", + "Pillow", ] lint = ["interrogate", "pre-commit", "ruff", "mypy"] test = ["pytest", "pytest-cov", "codespell", "nbformat", "nbconvert"] diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py new file mode 100755 index 00000000..8ab7649d --- /dev/null +++ b/scripts/generate_gallery.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Generate thumbnails for CausalPy documentation gallery. + +This script scans notebooks in docs/source/notebooks/ and generates +thumbnails from the first plot in each notebook. The index.md file +should be maintained manually. +""" + +import base64 +import io +import sys +from pathlib import Path + +try: + import nbformat + from nbconvert.preprocessors import ExecutePreprocessor +except ImportError: + print( + "Error: nbformat and nbconvert are required. Install with: pip install nbformat nbconvert" + ) + sys.exit(1) + +try: + from PIL import Image +except ImportError: + print("Warning: Pillow not found. Thumbnails will not be generated.") + Image = None # type: ignore[assignment,misc] + + +def _find_image_in_notebook(nb) -> str | None: + """Find first PNG image in notebook outputs.""" + for cell in nb.cells: + if cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs: + for output in cell.outputs: + if output.output_type in ("display_data", "execute_result"): + if image_data := output.get("data", {}).get("image/png"): + return image_data + return None + + +def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None: + """Extract first image from notebook outputs (without executing if outputs exist).""" + if Image is None: + return None + + try: + nb = nbformat.reads(notebook_path.read_text(encoding="utf-8"), as_version=4) + + # Try to find images in existing outputs first + if image_data := _find_image_in_notebook(nb): + return _save_thumbnail(notebook_path, output_dir, image_data) + + # Execute if notebook has no outputs + if not any( + cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs + for cell in nb.cells + ): + print(f" Executing {notebook_path.name} to generate thumbnail...") + try: + ExecutePreprocessor(timeout=120, kernel_name="python3").preprocess( + nb, {"metadata": {"path": str(notebook_path.parent)}} + ) + if image_data := _find_image_in_notebook(nb): + return _save_thumbnail(notebook_path, output_dir, image_data) + except Exception as e: + print(f" Warning: Failed to execute {notebook_path.name}: {e}") + + return None + except Exception as e: + print(f"Warning: Could not generate thumbnail for {notebook_path.name}: {e}") + return None + + +def _save_thumbnail( + notebook_path: Path, output_dir: Path, image_data: str +) -> str | None: + """Save thumbnail image from base64 data.""" + try: + thumbnail_name = f"{notebook_path.stem}.png" + thumbnail_path = output_dir / thumbnail_name + + # Decode and process image in memory + img = Image.open(io.BytesIO(base64.b64decode(image_data))) + target_size = (400, 250) + img.thumbnail(target_size, Image.Resampling.LANCZOS) + + # Create padded image and save + new_img = Image.new("RGB", target_size, (255, 255, 255)) + new_img.paste( + img, + ((target_size[0] - img.size[0]) // 2, (target_size[1] - img.size[1]) // 2), + ) + new_img.save(thumbnail_path) + + # Path relative to document location (notebooks/) + # Need to go up one level to source/, then into _static/thumbnails/ + return f"../_static/thumbnails/{thumbnail_name}" + except Exception as e: + print(f"Warning: Could not save thumbnail for {notebook_path.name}: {e}") + return None + + +def main(): + """Main function to generate thumbnails only.""" + # Paths + repo_root = Path(__file__).parent.parent + notebooks_dir = repo_root / "docs" / "source" / "notebooks" + thumbnails_dir = repo_root / "docs" / "source" / "_static" / "thumbnails" + + # Create thumbnails directory + thumbnails_dir.mkdir(parents=True, exist_ok=True) + + # Find all notebooks + notebook_files = sorted(notebooks_dir.glob("*.ipynb")) + + if not notebook_files: + print("No notebooks found!") + sys.exit(1) + + print(f"Found {len(notebook_files)} notebooks") + + # Process each notebook to generate thumbnails + for nb_path in notebook_files: + print(f"Processing {nb_path.name}...") + extract_first_image(nb_path, thumbnails_dir) + + print(f"Thumbnails saved to {thumbnails_dir}") + + +if __name__ == "__main__": + main()