From 1f231436e65dfa490d8e328bd2cd97465daa1f76 Mon Sep 17 00:00:00 2001
From: Michael Waskom <mwaskom@users.noreply.github.com>
Date: Thu, 4 Jun 2020 17:24:09 -0400
Subject: [PATCH] Store code examples in API docs using notebooks (#2123)

* Add new infrastructure for generating docs from notebooks

* Move rugplot examples into notebook

* More imporvements to new notebook processing infrastructure

* Add output stripping to doc notebook build

* Add script for auto-generating API doc notebooks

* Improve Makefile infra

* Convert kdeplot API docs to notebook

* Read notebook kernel from environment variable

* Add pandoc to GH doc build

* Consolidate workflow
---
 .github/workflows/ci.yaml     |   4 +-
 doc/.gitignore                |  12 +-
 doc/Makefile                  |  13 +-
 doc/docstrings/Makefile       |  10 +
 doc/docstrings/kdeplot.ipynb  | 337 ++++++++++++++++++++++++++++++++++
 doc/docstrings/rugplot.ipynb  | 137 ++++++++++++++
 doc/tools/extract_examples.py |  71 +++++++
 doc/tools/nb_to_doc.py        | 247 ++++++++++++++++++++++---
 doc/tutorial/Makefile         |   2 +-
 seaborn/_docstrings.py        |   6 +-
 seaborn/distributions.py      | 211 +--------------------
 11 files changed, 798 insertions(+), 252 deletions(-)
 create mode 100644 doc/docstrings/Makefile
 create mode 100644 doc/docstrings/kdeplot.ipynb
 create mode 100644 doc/docstrings/rugplot.ipynb
 create mode 100644 doc/tools/extract_examples.py

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 1b31609e41..74cd979301 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -30,7 +30,7 @@ jobs:
       - name: Install doc tools
         run: |
           pip install -r doc/requirements.txt
+          sudo apt-get install pandoc
 
       - name: Build docs
-        run: |
-          make -C doc notebooks html
+        run: make -C doc notebooks html
diff --git a/doc/.gitignore b/doc/.gitignore
index ea122c6ca9..5cb06a8e24 100644
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -4,13 +4,5 @@ generated/
 examples/
 example_thumbs/
 introduction.rst
-aesthetics.rst
-relational.rst
-color_palettes.rst
-distributions.rst
-regression.rst
-categorical.rst
-plotting_distributions.rst
-dataset_exploration.rst
-timeseries_plots.rst
-axis_grids.rst
+tutorial/*.rst
+docstrings/*.rst
diff --git a/doc/Makefile b/doc/Makefile
index 466f184505..1dfec5de5c 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -46,17 +46,24 @@ clean:
 	-rm -rf example_thumbs/*
 	-rm -rf tutorial/*_files/
 	-rm -rf tutorial/*.rst
+	-rm -rf docstrings/*_files/
+	-rm -rf docstrings/*.rst
 	-rm -rf generated/*
 	-rm -rf introduction_files/*
 	-rm introduction.rst
 
+.PHONY: tutorials
 tutorials:
 	make -C tutorial
 
-introduction: introduction.ipynb
-	tools/nb_to_doc.py introduction
+.PHONY: docstrings
+docstrings:
+	make -C docstrings
 
-notebooks: tutorials introduction
+introduction.rst: introduction.ipynb
+	tools/nb_to_doc.py ./introduction.ipynb
+
+notebooks: tutorials docstrings introduction.rst
 
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
diff --git a/doc/docstrings/Makefile b/doc/docstrings/Makefile
new file mode 100644
index 0000000000..57045bfc0e
--- /dev/null
+++ b/doc/docstrings/Makefile
@@ -0,0 +1,10 @@
+rst_files := $(patsubst %.ipynb,%.rst,$(wildcard *.ipynb))
+
+docstrings: ${rst_files}
+
+%.rst: %.ipynb
+	@../tools/nb_to_doc.py $*.ipynb
+	@cp -r $*_files ../generated/
+	@if [ -f ../generated/seaborn.$*.rst ]; then \
+	    touch ../generated/seaborn.$*.rst; \
+	 fi
diff --git a/doc/docstrings/kdeplot.ipynb b/doc/docstrings/kdeplot.ipynb
new file mode 100644
index 0000000000..42fca7f6d9
--- /dev/null
+++ b/doc/docstrings/kdeplot.ipynb
@@ -0,0 +1,337 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot a univariate distribution along the x axis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import seaborn as sns; sns.set()\n",
+    "tips = sns.load_dataset(\"tips\")\n",
+    "sns.kdeplot(data=tips, x=\"total_bill\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Flip the plot by assigning the data variable to the y axis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=tips, y=\"total_bill\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot distributions for each column of a wide-form dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris = sns.load_dataset(\"iris\")\n",
+    "sns.kdeplot(data=iris)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use less smoothing:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=tips, x=\"total_bill\", bw_adjust=.2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use more smoothing, but don't smooth past the extreme data points:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax= sns.kdeplot(data=tips, x=\"total_bill\", bw_adjust=5, cut=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot conditional distributions with hue mapping of a second variable:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\"Stack\" the conditional distributions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\", multiple=\"stack\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Normalize the stacked distribution at each value in the grid:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\", multiple=\"fill\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Estimate the cumulative distribution function(s), normalizing each subset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(\n",
+    "    data=tips, x=\"total_bill\", hue=\"time\",\n",
+    "    cumulative=True, common_norm=False, common_grid=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Estimate distribution from aggregated data, using weights:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tips_agg = (tips\n",
+    "    .groupby(\"size\")\n",
+    "    .agg(total_bill=(\"total_bill\", \"mean\"), n=(\"total_bill\", \"count\"))\n",
+    ")\n",
+    "sns.kdeplot(data=tips_agg, x=\"total_bill\", weights=\"n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Map the data variable with log scaling:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "diamonds = sns.load_dataset(\"diamonds\")\n",
+    "sns.kdeplot(data=diamonds, x=\"price\", log_scale=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use numeric hue mapping:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"size\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Modify the appearance of the plot:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(\n",
+    "   data=tips, x=\"total_bill\", hue=\"size\",\n",
+    "   fill=True, common_norm=False, palette=\"viridis\",\n",
+    "   alpha=.5, linewidth=0,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot a bivariate distribution:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "geyser = sns.load_dataset(\"geyser\")\n",
+    "sns.kdeplot(data=geyser, x=\"waiting\", y=\"duration\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Map a third variable with a hue semantic to show conditional distributions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Show filled contours:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(\n",
+    "    data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\", fill=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Show fewer contour levels, covering less of the distribution:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(\n",
+    "    data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\",\n",
+    "    levels=5, thresh=.2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fill the axes extent with a smooth distribution, using a different colormap:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.kdeplot(\n",
+    "    data=geyser, x=\"waiting\", y=\"duration\",\n",
+    "    fill=True, thresh=0, levels=100, cmap=\"mako\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "seaborn-refactor (py38)",
+   "language": "python",
+   "name": "seaborn-refactor"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/doc/docstrings/rugplot.ipynb b/doc/docstrings/rugplot.ipynb
new file mode 100644
index 0000000000..34dddeb9c0
--- /dev/null
+++ b/doc/docstrings/rugplot.ipynb
@@ -0,0 +1,137 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Add a rug along one of the axes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import seaborn as sns; sns.set()\n",
+    "tips = sns.load_dataset(\"tips\")\n",
+    "sns.kdeplot(data=tips, x=\"total_bill\")\n",
+    "sns.rugplot(data=tips, x=\"total_bill\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Add a rug along both axes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n",
+    "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Represent a third variable with hue mapping:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")\n",
+    "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Draw a taller rug:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n",
+    "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", height=.1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Put the rug outside the axes:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n",
+    "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", height=-.02, clip_on=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Show the density of a larger dataset using thinner lines and alpha blending:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "diamonds = sns.load_dataset(\"diamonds\")\n",
+    "sns.scatterplot(data=diamonds, x=\"carat\", y=\"price\", s=5)\n",
+    "sns.rugplot(data=diamonds, x=\"carat\", y=\"price\", lw=1, alpha=.005)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "seaborn-refactor (py38)",
+   "language": "python",
+   "name": "seaborn-refactor"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/doc/tools/extract_examples.py b/doc/tools/extract_examples.py
new file mode 100644
index 0000000000..272bb6c76b
--- /dev/null
+++ b/doc/tools/extract_examples.py
@@ -0,0 +1,71 @@
+"""Turn the examples section of a function docstring into a notebook."""
+import re
+import sys
+import pydoc
+import seaborn
+from seaborn.external.docscrape import NumpyDocString
+import nbformat
+
+
+def line_type(line):
+
+    if line.startswith("    "):
+        return "code"
+    else:
+        return "markdown"
+
+
+def add_cell(nb, lines, cell_type):
+
+    cell_objs = {
+        "code": nbformat.v4.new_code_cell,
+        "markdown": nbformat.v4.new_markdown_cell,
+    }
+    text = "\n".join(lines)
+    cell = cell_objs[cell_type](text)
+    nb["cells"].append(cell)
+
+
+if __name__ == "__main__":
+
+    _, func_name = sys.argv
+
+    # Parse the docstring and get the examples section
+    func_obj = getattr(seaborn, func_name)
+    lines = NumpyDocString(pydoc.getdoc(func_obj))["Examples"]
+
+    # Remove code indentation, the prompt, and mpl return variable
+    pat = re.compile(r"\s{4}[>\.]{3} (ax = ){0,1}(g = ){0,1}")
+
+    nb = nbformat.v4.new_notebook()
+
+    # We always start with at least one line of text
+    cell_type = "markdown"
+    cell = []
+
+    for line in lines:
+
+        # Ignore matplotlib plot directive
+        if ".. plot" in line or ":context:" in line:
+            continue
+
+        # Ignore blank lines
+        if not line:
+            continue
+
+        if line_type(line) != cell_type:
+            # We are on the first line of the next cell,
+            # so package up the last cell
+            add_cell(nb, cell, cell_type)
+            cell_type = line_type(line)
+            cell = []
+
+        if line_type(line) == "code":
+            line = re.sub(pat, "", line)
+
+        cell.append(line)
+
+    # Package the final cell
+    add_cell(nb, cell, cell_type)
+
+    nbformat.write(nb, f"docstrings/{func_name}.ipynb")
diff --git a/doc/tools/nb_to_doc.py b/doc/tools/nb_to_doc.py
index 46ab57af4c..3d3330e429 100755
--- a/doc/tools/nb_to_doc.py
+++ b/doc/tools/nb_to_doc.py
@@ -1,47 +1,236 @@
 #! /usr/bin/env python
-"""
-Convert empty IPython notebook to a sphinx doc page.
+"""Execute a .ipynb file, write out a processed .rst and clean .ipynb.
+
+The functions in this script were copied from the nbstripout tool:
+
+Copyright (c) 2015 Min RK, Florian Rathgeber, Michael McNeil Forbes
+2019 Casper da Costa-Luis
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
 
-TODO this should be rewritten to use the nbconvert Python API.
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 """
 import os
 import sys
-from subprocess import check_call as sh
+import nbformat
+from nbconvert import RSTExporter
+from nbconvert.preprocessors import (
+    ExecutePreprocessor,
+    TagRemovePreprocessor,
+    ExtractOutputPreprocessor
+)
+from traitlets.config import Config
+
+
+class MetadataError(Exception):
+    pass
+
+
+def pop_recursive(d, key, default=None):
+    """dict.pop(key) where `key` is a `.`-delimited list of nested keys.
+    >>> d = {'a': {'b': 1, 'c': 2}}
+    >>> pop_recursive(d, 'a.c')
+    2
+    >>> d
+    {'a': {'b': 1}}
+    """
+    nested = key.split('.')
+    current = d
+    for k in nested[:-1]:
+        if hasattr(current, 'get'):
+            current = current.get(k, {})
+        else:
+            return default
+    if not hasattr(current, 'pop'):
+        return default
+    return current.pop(nested[-1], default)
+
+
+def _cells(nb):
+    """Yield all cells in an nbformat-insensitive manner"""
+    if nb.nbformat < 4:
+        for ws in nb.worksheets:
+            for cell in ws.cells:
+                yield cell
+    else:
+        for cell in nb.cells:
+            yield cell
+
+
+def determine_keep_output(cell, default):
+    """Given a cell, determine whether output should be kept
+    Based on whether the metadata has "init_cell": true,
+    "keep_output": true, or the tags contain "keep_output" """
+    if 'init_cell' in cell.metadata:
+        return bool(cell.metadata.init_cell)
+
+    has_keep_output_metadata = 'keep_output' in cell.metadata
+    keep_output_metadata = bool(cell.metadata.get('keep_output', False))
+
+    has_keep_output_tag = 'keep_output' in cell.metadata.get('tags', [])
+
+    # keep_output between metadata and tags should not contradict each other
+    if has_keep_output_metadata \
+       and has_keep_output_tag \
+       and not keep_output_metadata:
+        raise MetadataError(
+            "cell metadata contradicts tags: "
+            "\"keep_output\": false, but keep_output in tags"
+        )
 
+    if has_keep_output_metadata or has_keep_output_tag:
+        return keep_output_metadata or has_keep_output_tag
+    return default
 
-def convert_nb(nbname):
 
-    # Execute the notebook
-    exec_cmdline = [
-        "jupyter", "nbconvert",
-        "--to", "notebook",
-        "--execute",
-        "--inplace",
-        nbname,
-    ]
+def strip_output(nb, keep_output=False, keep_count=False, extra_keys=''):
+    """
+    Strip the outputs, execution count/prompt number and miscellaneous
+    metadata from a notebook object, unless specified to keep either the
+    outputs or counts.
+    `extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz'
+    """
+    if keep_output is None and 'keep_output' in nb.metadata:
+        keep_output = bool(nb.metadata['keep_output'])
 
-    kernel = os.environ.get("NB_KERNEL", "")
-    if kernel:
-        exec_cmdline.append(f"--ExecutePreprocessor.kernel_name={kernel}")
+    if hasattr(extra_keys, 'decode'):
+        extra_keys = extra_keys.decode()
+    extra_keys = extra_keys.split()
+    keys = {'metadata': [], 'cell': {'metadata': []}}
+    for key in extra_keys:
+        if key.startswith('metadata.'):
+            keys['metadata'].append(key[len('metadata.'):])
+        elif key.startswith('cell.metadata.'):
+            keys['cell']['metadata'].append(key[len('cell.metadata.'):])
+        else:
+            sys.stderr.write('ignoring extra key `%s`' % key)
 
-    sh(exec_cmdline)
+    nb.metadata.pop('signature', None)
+    nb.metadata.pop('widgets', None)
 
-    # Convert to .rst for Sphinx
-    sh(["jupyter", "nbconvert", "--to", "rst", nbname,
-        "--TagRemovePreprocessor.remove_cell_tags={'hide'}",
-        "--TagRemovePreprocessor.remove_input_tags={'hide-input'}",
-        "--TagRemovePreprocessor.remove_all_outputs_tags={'hide-output'}"])
+    for field in keys['metadata']:
+        pop_recursive(nb.metadata, field)
 
-    # Clear notebook output
-    sh(["jupyter", "nbconvert", "--to", "notebook", "--inplace",
-        "--ClearOutputPreprocessor.enabled=True", nbname])
+    for cell in _cells(nb):
+        keep_output_this_cell = determine_keep_output(cell, keep_output)
 
-    # Touch the .rst file so it has a later modify time than the source
-    sh(["touch", nbname + ".rst"])
+        # Remove the outputs, unless directed otherwise
+        if 'outputs' in cell:
+
+            # Default behavior strips outputs. With all outputs stripped,
+            # there are no counts to keep and keep_count is ignored.
+            if not keep_output_this_cell:
+                cell['outputs'] = []
+
+            # If keep_output_this_cell, but not keep_count, strip the counts
+            # from the output.
+            if keep_output_this_cell and not keep_count:
+                for output in cell['outputs']:
+                    if 'execution_count' in output:
+                        output['execution_count'] = None
+
+            # If keep_output_this_cell and keep_count, do nothing.
+
+        # Remove the prompt_number/execution_count, unless directed otherwise
+        if 'prompt_number' in cell and not keep_count:
+            cell['prompt_number'] = None
+        if 'execution_count' in cell and not keep_count:
+            cell['execution_count'] = None
+
+        # Always remove this metadata
+        for output_style in ['collapsed', 'scrolled']:
+            if output_style in cell.metadata:
+                cell.metadata[output_style] = False
+        if 'metadata' in cell:
+            for field in ['collapsed', 'scrolled', 'ExecuteTime']:
+                cell.metadata.pop(field, None)
+        for (extra, fields) in keys['cell'].items():
+            if extra in cell:
+                for field in fields:
+                    pop_recursive(getattr(cell, extra), field)
+    return nb
 
 
 if __name__ == "__main__":
 
-    for nbname in sys.argv[1:]:
-        convert_nb(nbname)
+    # Get the desired ipynb file path and parse into components
+    _, fpath = sys.argv
+    basedir, fname = os.path.split(fpath)
+    fstem = fname[:-6]
+
+    # Read the notebook
+    print(f"Executing {fpath} ...", end=" ", flush=True)
+    with open(fpath) as f:
+        nb = nbformat.read(f, as_version=4)
+
+    # Run the notebook
+    kernel = os.environ.get("NB_KERNEL", None)
+    if kernel is None:
+        kernel = nb["metadata"]["kernelspec"]["name"]
+    ep = ExecutePreprocessor(
+        timeout=600,
+        kernel_name=kernel,
+        extra_arguments=["--InlineBackend.rc={'figure.dpi': 96}"]
+    )
+    ep.preprocess(nb, {"metadata": {"path": basedir}})
+
+    # Remove the execution result outputs
+    for cell in nb.get("cells", {}):
+        fields = cell.get("outputs", [])
+        for field in fields:
+            if field["output_type"] == "execute_result":
+                fields.remove(field)
+
+    # Convert to .rst formats
+    exp = RSTExporter()
+
+    c = Config()
+    c.TagRemovePreprocessor.remove_cell_tags = {"hide"}
+    c.TagRemovePreprocessor.remove_input_tags = {"hide-input"}
+    c.TagRemovePreprocessor.remove_all_outputs_tags = {"hide-output"}
+    c.ExtractOutputPreprocessor.output_filename_template = \
+        f"{fstem}_files/{fstem}_" + "{cell_index}_{index}{extension}"
+
+    exp.register_preprocessor(TagRemovePreprocessor(config=c), True)
+    exp.register_preprocessor(ExtractOutputPreprocessor(config=c), True)
+
+    body, resources = exp.from_notebook_node(nb)
+
+    # Clean the output on the notebook and save a .ipynb back to disk
+    print(f"Writing clean {fpath} ... ", end=" ", flush=True)
+    nb = strip_output(nb)
+    with open(fpath, "wt") as f:
+        nbformat.write(nb, f)
+
+    # Write the .rst file
+    rst_path = os.path.join(basedir, f"{fstem}.rst")
+    print(f"Writing {rst_path}")
+    with open(rst_path, "w") as f:
+        f.write(body)
+
+    # Write the individual image outputs
+    imdir = os.path.join(basedir, f"{fstem}_files")
+    if not os.path.exists(imdir):
+        os.mkdir(imdir)
+
+    for imname, imdata in resources["outputs"].items():
+        if imname.startswith(fstem):
+            impath = os.path.join(basedir, f"{imname}")
+            with open(impath, "wb") as f:
+                f.write(imdata)
diff --git a/doc/tutorial/Makefile b/doc/tutorial/Makefile
index a77fda5054..c8ca64d97d 100644
--- a/doc/tutorial/Makefile
+++ b/doc/tutorial/Makefile
@@ -3,4 +3,4 @@ rst_files := $(patsubst %.ipynb,%.rst,$(wildcard *.ipynb))
 tutorial: ${rst_files}
 
 %.rst: %.ipynb
-	../tools/nb_to_doc.py $*
+	../tools/nb_to_doc.py $*.ipynb
diff --git a/seaborn/_docstrings.py b/seaborn/_docstrings.py
index 6a50e7a9cc..52be7d6aa8 100644
--- a/seaborn/_docstrings.py
+++ b/seaborn/_docstrings.py
@@ -49,7 +49,11 @@ def from_function_params(cls, func):
 
 
 # TODO is "vector" the best term here? We mean to imply 1D data with a variety
-# of types, but vectors are actually 2D (row or columns...)
+# of types?
+
+# TODO now that we can parse numpydoc style strings, do we need to define dicts
+# of docstring components, or just write out a docstring?
+
 
 _core_params = dict(
     data="""
diff --git a/seaborn/distributions.py b/seaborn/distributions.py
index 1ff606ee68..6d68a58b23 100644
--- a/seaborn/distributions.py
+++ b/seaborn/distributions.py
@@ -867,160 +867,7 @@ def kdeplot(
 Examples
 --------
 
-Plot a univariate distribution along the x axis:
-
-.. plot::
-    :context: close-figs
-
-    >>> import seaborn as sns; sns.set()
-    >>> tips = sns.load_dataset("tips")
-    >>> ax = sns.kdeplot(data=tips, x="total_bill")
-
-Flip the plot by assigning the data variable to the y axis:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(data=tips, y="total_bill")
-
-Plot distributions for each column of a wide-form dataset:
-
-.. plot::
-    :context: close-figs
-
-    >>> iris = sns.load_dataset("iris")
-    >>> ax = sns.kdeplot(data=iris)
-
-Use less smoothing:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(data=tips, x="total_bill", bw_adjust=.2)
-
-Use more smoothing, but don't smooth past the extreme data points:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax= sns.kdeplot(data=tips, x="total_bill", bw_adjust=5, cut=0)
-
-Plot conditional distributions with hue mapping of a second variable:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(data=tips, x="total_bill", hue="time")
-
-"Stack" the conditional distributions:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...     data=tips, x="total_bill", hue="time", multiple="stack"
-    ... )
-
-Normalize the stacked distribution at each value in the grid:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...     data=tips, x="total_bill", hue="time", multiple="fill"
-    ... )
-
-Estimate the cumulative distribution function(s), normalizing each subset:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...     data=tips, x="total_bill", hue="time",
-    ...     cumulative=True, common_norm=False, common_grid=True,
-    ... )
-
-Estimate distribution from aggregated data, using weights:
-
-.. plot::
-    :context: close-figs
-
-    >>> tips_agg = (tips
-    ...     .groupby("size")
-    ...     .agg(total_bill=("total_bill", "mean"), n=("total_bill", "count"))
-    ... )
-    >>> ax = sns.kdeplot(data=tips_agg, x="total_bill", weights="n")
-
-Map the data variable with log scaling:
-
-.. plot::
-    :context: close-figs
-
-    >>> diamonds = sns.load_dataset("diamonds")
-    >>> ax = sns.kdeplot(data=diamonds, x="price", log_scale=True)
-
-Use numeric hue mapping:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(data=tips, x="total_bill", hue="size")
-
-Modify the appearance of the plot:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...    data=tips, x="total_bill", hue="size",
-    ...    fill=True, common_norm=False, palette="viridis",
-    ...    alpha=.5, linewidth=0,
-    ... )
-
-Plot a bivariate distribution:
-
-.. plot::
-    :context: close-figs
-
-    >>> geyser = sns.load_dataset("geyser")
-    >>> ax = sns.kdeplot(data=geyser, x="waiting", y="duration")
-
-Map a third variable with a hue semantic to show conditional distributions:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(data=geyser, x="waiting", y="duration", hue="kind")
-
-Show filled contours:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...     data=geyser, x="waiting", y="duration", hue="kind", fill=True,
-    ... )
-
-Show fewer contour levels, covering less of the distribution:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...     data=geyser, x="waiting", y="duration", hue="kind",
-    ...     levels=5, thresh=.2,
-    ... )
-
-Fill the axes extent with a smooth distribution, using a different colormap:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.kdeplot(
-    ...     data=geyser, x="waiting", y="duration",
-    ...     fill=True, thresh=0, levels=100, cmap="mako",
-    ... )
-
+.. include:: ../docstrings/kdeplot.rst
 
 """.format(
     params=_param_docs,
@@ -1199,6 +1046,9 @@ def rugplot(
     overlap with other elements.
 legend : bool
     If False, do not add a legend for semantic variables.
+kwargs
+    Other keyword arguments are passed to
+    :meth:`matplotlib.collections.LineCollection`
 
 Returns
 -------
@@ -1207,58 +1057,7 @@ def rugplot(
 Examples
 --------
 
-Add a rug along one of the axes:
-
-.. plot::
-    :context: close-figs
-
-    >>> import seaborn as sns; sns.set()
-    >>> tips = sns.load_dataset("tips")
-    >>> ax = sns.kdeplot(data=tips, x="total_bill")
-    >>> ax = sns.rugplot(data=tips, x="total_bill")
-
-Add a rug along both axes:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip")
-    >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip")
-
-Represent a third variable with hue mapping:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip", hue="time")
-    >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip", hue="time")
-
-Draw a taller rug:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip")
-    >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip", height=.1)
-
-Put the rug outside the axes:
-
-.. plot::
-    :context: close-figs
-
-    >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip")
-    >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip",
-    ...     height=-.02, clip_on=False,
-    ... )
-
-Show the density of a larger dataset using thinner lines and alpha blending:
-
-.. plot::
-    :context: close-figs
-
-    >>> diamonds = sns.load_dataset("diamonds")
-    >>> ax = sns.scatterplot(data=diamonds, x="carat", y="price", s=5)
-    >>> ax = sns.rugplot(data=diamonds, x="carat", y="price", lw=1, alpha=.005)
+.. include:: ../docstrings/rugplot.rst
 
 """.format(
     params=_param_docs,