From 1f231436e65dfa490d8e328bd2cd97465daa1f76 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Thu, 4 Jun 2020 17:24:09 -0400 Subject: [PATCH] Store code examples in API docs using notebooks (#2123) * Add new infrastructure for generating docs from notebooks * Move rugplot examples into notebook * More imporvements to new notebook processing infrastructure * Add output stripping to doc notebook build * Add script for auto-generating API doc notebooks * Improve Makefile infra * Convert kdeplot API docs to notebook * Read notebook kernel from environment variable * Add pandoc to GH doc build * Consolidate workflow --- .github/workflows/ci.yaml | 4 +- doc/.gitignore | 12 +- doc/Makefile | 13 +- doc/docstrings/Makefile | 10 + doc/docstrings/kdeplot.ipynb | 337 ++++++++++++++++++++++++++++++++++ doc/docstrings/rugplot.ipynb | 137 ++++++++++++++ doc/tools/extract_examples.py | 71 +++++++ doc/tools/nb_to_doc.py | 247 ++++++++++++++++++++++--- doc/tutorial/Makefile | 2 +- seaborn/_docstrings.py | 6 +- seaborn/distributions.py | 211 +-------------------- 11 files changed, 798 insertions(+), 252 deletions(-) create mode 100644 doc/docstrings/Makefile create mode 100644 doc/docstrings/kdeplot.ipynb create mode 100644 doc/docstrings/rugplot.ipynb create mode 100644 doc/tools/extract_examples.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1b31609e41..74cd979301 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,7 +30,7 @@ jobs: - name: Install doc tools run: | pip install -r doc/requirements.txt + sudo apt-get install pandoc - name: Build docs - run: | - make -C doc notebooks html + run: make -C doc notebooks html diff --git a/doc/.gitignore b/doc/.gitignore index ea122c6ca9..5cb06a8e24 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -4,13 +4,5 @@ generated/ examples/ example_thumbs/ introduction.rst -aesthetics.rst -relational.rst -color_palettes.rst -distributions.rst -regression.rst -categorical.rst -plotting_distributions.rst -dataset_exploration.rst -timeseries_plots.rst -axis_grids.rst +tutorial/*.rst +docstrings/*.rst diff --git a/doc/Makefile b/doc/Makefile index 466f184505..1dfec5de5c 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -46,17 +46,24 @@ clean: -rm -rf example_thumbs/* -rm -rf tutorial/*_files/ -rm -rf tutorial/*.rst + -rm -rf docstrings/*_files/ + -rm -rf docstrings/*.rst -rm -rf generated/* -rm -rf introduction_files/* -rm introduction.rst +.PHONY: tutorials tutorials: make -C tutorial -introduction: introduction.ipynb - tools/nb_to_doc.py introduction +.PHONY: docstrings +docstrings: + make -C docstrings -notebooks: tutorials introduction +introduction.rst: introduction.ipynb + tools/nb_to_doc.py ./introduction.ipynb + +notebooks: tutorials docstrings introduction.rst html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html diff --git a/doc/docstrings/Makefile b/doc/docstrings/Makefile new file mode 100644 index 0000000000..57045bfc0e --- /dev/null +++ b/doc/docstrings/Makefile @@ -0,0 +1,10 @@ +rst_files := $(patsubst %.ipynb,%.rst,$(wildcard *.ipynb)) + +docstrings: ${rst_files} + +%.rst: %.ipynb + @../tools/nb_to_doc.py $*.ipynb + @cp -r $*_files ../generated/ + @if [ -f ../generated/seaborn.$*.rst ]; then \ + touch ../generated/seaborn.$*.rst; \ + fi diff --git a/doc/docstrings/kdeplot.ipynb b/doc/docstrings/kdeplot.ipynb new file mode 100644 index 0000000000..42fca7f6d9 --- /dev/null +++ b/doc/docstrings/kdeplot.ipynb @@ -0,0 +1,337 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot a univariate distribution along the x axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set()\n", + "tips = sns.load_dataset(\"tips\")\n", + "sns.kdeplot(data=tips, x=\"total_bill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Flip the plot by assigning the data variable to the y axis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, y=\"total_bill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot distributions for each column of a wide-form dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris = sns.load_dataset(\"iris\")\n", + "sns.kdeplot(data=iris)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use less smoothing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", bw_adjust=.2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use more smoothing, but don't smooth past the extreme data points:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ax= sns.kdeplot(data=tips, x=\"total_bill\", bw_adjust=5, cut=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot conditional distributions with hue mapping of a second variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Stack\" the conditional distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\", multiple=\"stack\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normalize the stacked distribution at each value in the grid:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"time\", multiple=\"fill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Estimate the cumulative distribution function(s), normalizing each subset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=tips, x=\"total_bill\", hue=\"time\",\n", + " cumulative=True, common_norm=False, common_grid=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Estimate distribution from aggregated data, using weights:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tips_agg = (tips\n", + " .groupby(\"size\")\n", + " .agg(total_bill=(\"total_bill\", \"mean\"), n=(\"total_bill\", \"count\"))\n", + ")\n", + "sns.kdeplot(data=tips_agg, x=\"total_bill\", weights=\"n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Map the data variable with log scaling:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diamonds = sns.load_dataset(\"diamonds\")\n", + "sns.kdeplot(data=diamonds, x=\"price\", log_scale=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use numeric hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=tips, x=\"total_bill\", hue=\"size\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Modify the appearance of the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=tips, x=\"total_bill\", hue=\"size\",\n", + " fill=True, common_norm=False, palette=\"viridis\",\n", + " alpha=.5, linewidth=0,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot a bivariate distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geyser = sns.load_dataset(\"geyser\")\n", + "sns.kdeplot(data=geyser, x=\"waiting\", y=\"duration\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Map a third variable with a hue semantic to show conditional distributions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show filled contours:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\", fill=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show fewer contour levels, covering less of the distribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=geyser, x=\"waiting\", y=\"duration\", hue=\"kind\",\n", + " levels=5, thresh=.2,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fill the axes extent with a smooth distribution, using a different colormap:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.kdeplot(\n", + " data=geyser, x=\"waiting\", y=\"duration\",\n", + " fill=True, thresh=0, levels=100, cmap=\"mako\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "seaborn-refactor (py38)", + "language": "python", + "name": "seaborn-refactor" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/docstrings/rugplot.ipynb b/doc/docstrings/rugplot.ipynb new file mode 100644 index 0000000000..34dddeb9c0 --- /dev/null +++ b/doc/docstrings/rugplot.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a rug along one of the axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns; sns.set()\n", + "tips = sns.load_dataset(\"tips\")\n", + "sns.kdeplot(data=tips, x=\"total_bill\")\n", + "sns.rugplot(data=tips, x=\"total_bill\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add a rug along both axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Represent a third variable with hue mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", hue=\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Draw a taller rug:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", height=.1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Put the rug outside the axes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sns.scatterplot(data=tips, x=\"total_bill\", y=\"tip\")\n", + "sns.rugplot(data=tips, x=\"total_bill\", y=\"tip\", height=-.02, clip_on=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show the density of a larger dataset using thinner lines and alpha blending:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diamonds = sns.load_dataset(\"diamonds\")\n", + "sns.scatterplot(data=diamonds, x=\"carat\", y=\"price\", s=5)\n", + "sns.rugplot(data=diamonds, x=\"carat\", y=\"price\", lw=1, alpha=.005)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "seaborn-refactor (py38)", + "language": "python", + "name": "seaborn-refactor" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/tools/extract_examples.py b/doc/tools/extract_examples.py new file mode 100644 index 0000000000..272bb6c76b --- /dev/null +++ b/doc/tools/extract_examples.py @@ -0,0 +1,71 @@ +"""Turn the examples section of a function docstring into a notebook.""" +import re +import sys +import pydoc +import seaborn +from seaborn.external.docscrape import NumpyDocString +import nbformat + + +def line_type(line): + + if line.startswith(" "): + return "code" + else: + return "markdown" + + +def add_cell(nb, lines, cell_type): + + cell_objs = { + "code": nbformat.v4.new_code_cell, + "markdown": nbformat.v4.new_markdown_cell, + } + text = "\n".join(lines) + cell = cell_objs[cell_type](text) + nb["cells"].append(cell) + + +if __name__ == "__main__": + + _, func_name = sys.argv + + # Parse the docstring and get the examples section + func_obj = getattr(seaborn, func_name) + lines = NumpyDocString(pydoc.getdoc(func_obj))["Examples"] + + # Remove code indentation, the prompt, and mpl return variable + pat = re.compile(r"\s{4}[>\.]{3} (ax = ){0,1}(g = ){0,1}") + + nb = nbformat.v4.new_notebook() + + # We always start with at least one line of text + cell_type = "markdown" + cell = [] + + for line in lines: + + # Ignore matplotlib plot directive + if ".. plot" in line or ":context:" in line: + continue + + # Ignore blank lines + if not line: + continue + + if line_type(line) != cell_type: + # We are on the first line of the next cell, + # so package up the last cell + add_cell(nb, cell, cell_type) + cell_type = line_type(line) + cell = [] + + if line_type(line) == "code": + line = re.sub(pat, "", line) + + cell.append(line) + + # Package the final cell + add_cell(nb, cell, cell_type) + + nbformat.write(nb, f"docstrings/{func_name}.ipynb") diff --git a/doc/tools/nb_to_doc.py b/doc/tools/nb_to_doc.py index 46ab57af4c..3d3330e429 100755 --- a/doc/tools/nb_to_doc.py +++ b/doc/tools/nb_to_doc.py @@ -1,47 +1,236 @@ #! /usr/bin/env python -""" -Convert empty IPython notebook to a sphinx doc page. +"""Execute a .ipynb file, write out a processed .rst and clean .ipynb. + +The functions in this script were copied from the nbstripout tool: + +Copyright (c) 2015 Min RK, Florian Rathgeber, Michael McNeil Forbes +2019 Casper da Costa-Luis + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: -TODO this should be rewritten to use the nbconvert Python API. +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import os import sys -from subprocess import check_call as sh +import nbformat +from nbconvert import RSTExporter +from nbconvert.preprocessors import ( + ExecutePreprocessor, + TagRemovePreprocessor, + ExtractOutputPreprocessor +) +from traitlets.config import Config + + +class MetadataError(Exception): + pass + + +def pop_recursive(d, key, default=None): + """dict.pop(key) where `key` is a `.`-delimited list of nested keys. + >>> d = {'a': {'b': 1, 'c': 2}} + >>> pop_recursive(d, 'a.c') + 2 + >>> d + {'a': {'b': 1}} + """ + nested = key.split('.') + current = d + for k in nested[:-1]: + if hasattr(current, 'get'): + current = current.get(k, {}) + else: + return default + if not hasattr(current, 'pop'): + return default + return current.pop(nested[-1], default) + + +def _cells(nb): + """Yield all cells in an nbformat-insensitive manner""" + if nb.nbformat < 4: + for ws in nb.worksheets: + for cell in ws.cells: + yield cell + else: + for cell in nb.cells: + yield cell + + +def determine_keep_output(cell, default): + """Given a cell, determine whether output should be kept + Based on whether the metadata has "init_cell": true, + "keep_output": true, or the tags contain "keep_output" """ + if 'init_cell' in cell.metadata: + return bool(cell.metadata.init_cell) + + has_keep_output_metadata = 'keep_output' in cell.metadata + keep_output_metadata = bool(cell.metadata.get('keep_output', False)) + + has_keep_output_tag = 'keep_output' in cell.metadata.get('tags', []) + + # keep_output between metadata and tags should not contradict each other + if has_keep_output_metadata \ + and has_keep_output_tag \ + and not keep_output_metadata: + raise MetadataError( + "cell metadata contradicts tags: " + "\"keep_output\": false, but keep_output in tags" + ) + if has_keep_output_metadata or has_keep_output_tag: + return keep_output_metadata or has_keep_output_tag + return default -def convert_nb(nbname): - # Execute the notebook - exec_cmdline = [ - "jupyter", "nbconvert", - "--to", "notebook", - "--execute", - "--inplace", - nbname, - ] +def strip_output(nb, keep_output=False, keep_count=False, extra_keys=''): + """ + Strip the outputs, execution count/prompt number and miscellaneous + metadata from a notebook object, unless specified to keep either the + outputs or counts. + `extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz' + """ + if keep_output is None and 'keep_output' in nb.metadata: + keep_output = bool(nb.metadata['keep_output']) - kernel = os.environ.get("NB_KERNEL", "") - if kernel: - exec_cmdline.append(f"--ExecutePreprocessor.kernel_name={kernel}") + if hasattr(extra_keys, 'decode'): + extra_keys = extra_keys.decode() + extra_keys = extra_keys.split() + keys = {'metadata': [], 'cell': {'metadata': []}} + for key in extra_keys: + if key.startswith('metadata.'): + keys['metadata'].append(key[len('metadata.'):]) + elif key.startswith('cell.metadata.'): + keys['cell']['metadata'].append(key[len('cell.metadata.'):]) + else: + sys.stderr.write('ignoring extra key `%s`' % key) - sh(exec_cmdline) + nb.metadata.pop('signature', None) + nb.metadata.pop('widgets', None) - # Convert to .rst for Sphinx - sh(["jupyter", "nbconvert", "--to", "rst", nbname, - "--TagRemovePreprocessor.remove_cell_tags={'hide'}", - "--TagRemovePreprocessor.remove_input_tags={'hide-input'}", - "--TagRemovePreprocessor.remove_all_outputs_tags={'hide-output'}"]) + for field in keys['metadata']: + pop_recursive(nb.metadata, field) - # Clear notebook output - sh(["jupyter", "nbconvert", "--to", "notebook", "--inplace", - "--ClearOutputPreprocessor.enabled=True", nbname]) + for cell in _cells(nb): + keep_output_this_cell = determine_keep_output(cell, keep_output) - # Touch the .rst file so it has a later modify time than the source - sh(["touch", nbname + ".rst"]) + # Remove the outputs, unless directed otherwise + if 'outputs' in cell: + + # Default behavior strips outputs. With all outputs stripped, + # there are no counts to keep and keep_count is ignored. + if not keep_output_this_cell: + cell['outputs'] = [] + + # If keep_output_this_cell, but not keep_count, strip the counts + # from the output. + if keep_output_this_cell and not keep_count: + for output in cell['outputs']: + if 'execution_count' in output: + output['execution_count'] = None + + # If keep_output_this_cell and keep_count, do nothing. + + # Remove the prompt_number/execution_count, unless directed otherwise + if 'prompt_number' in cell and not keep_count: + cell['prompt_number'] = None + if 'execution_count' in cell and not keep_count: + cell['execution_count'] = None + + # Always remove this metadata + for output_style in ['collapsed', 'scrolled']: + if output_style in cell.metadata: + cell.metadata[output_style] = False + if 'metadata' in cell: + for field in ['collapsed', 'scrolled', 'ExecuteTime']: + cell.metadata.pop(field, None) + for (extra, fields) in keys['cell'].items(): + if extra in cell: + for field in fields: + pop_recursive(getattr(cell, extra), field) + return nb if __name__ == "__main__": - for nbname in sys.argv[1:]: - convert_nb(nbname) + # Get the desired ipynb file path and parse into components + _, fpath = sys.argv + basedir, fname = os.path.split(fpath) + fstem = fname[:-6] + + # Read the notebook + print(f"Executing {fpath} ...", end=" ", flush=True) + with open(fpath) as f: + nb = nbformat.read(f, as_version=4) + + # Run the notebook + kernel = os.environ.get("NB_KERNEL", None) + if kernel is None: + kernel = nb["metadata"]["kernelspec"]["name"] + ep = ExecutePreprocessor( + timeout=600, + kernel_name=kernel, + extra_arguments=["--InlineBackend.rc={'figure.dpi': 96}"] + ) + ep.preprocess(nb, {"metadata": {"path": basedir}}) + + # Remove the execution result outputs + for cell in nb.get("cells", {}): + fields = cell.get("outputs", []) + for field in fields: + if field["output_type"] == "execute_result": + fields.remove(field) + + # Convert to .rst formats + exp = RSTExporter() + + c = Config() + c.TagRemovePreprocessor.remove_cell_tags = {"hide"} + c.TagRemovePreprocessor.remove_input_tags = {"hide-input"} + c.TagRemovePreprocessor.remove_all_outputs_tags = {"hide-output"} + c.ExtractOutputPreprocessor.output_filename_template = \ + f"{fstem}_files/{fstem}_" + "{cell_index}_{index}{extension}" + + exp.register_preprocessor(TagRemovePreprocessor(config=c), True) + exp.register_preprocessor(ExtractOutputPreprocessor(config=c), True) + + body, resources = exp.from_notebook_node(nb) + + # Clean the output on the notebook and save a .ipynb back to disk + print(f"Writing clean {fpath} ... ", end=" ", flush=True) + nb = strip_output(nb) + with open(fpath, "wt") as f: + nbformat.write(nb, f) + + # Write the .rst file + rst_path = os.path.join(basedir, f"{fstem}.rst") + print(f"Writing {rst_path}") + with open(rst_path, "w") as f: + f.write(body) + + # Write the individual image outputs + imdir = os.path.join(basedir, f"{fstem}_files") + if not os.path.exists(imdir): + os.mkdir(imdir) + + for imname, imdata in resources["outputs"].items(): + if imname.startswith(fstem): + impath = os.path.join(basedir, f"{imname}") + with open(impath, "wb") as f: + f.write(imdata) diff --git a/doc/tutorial/Makefile b/doc/tutorial/Makefile index a77fda5054..c8ca64d97d 100644 --- a/doc/tutorial/Makefile +++ b/doc/tutorial/Makefile @@ -3,4 +3,4 @@ rst_files := $(patsubst %.ipynb,%.rst,$(wildcard *.ipynb)) tutorial: ${rst_files} %.rst: %.ipynb - ../tools/nb_to_doc.py $* + ../tools/nb_to_doc.py $*.ipynb diff --git a/seaborn/_docstrings.py b/seaborn/_docstrings.py index 6a50e7a9cc..52be7d6aa8 100644 --- a/seaborn/_docstrings.py +++ b/seaborn/_docstrings.py @@ -49,7 +49,11 @@ def from_function_params(cls, func): # TODO is "vector" the best term here? We mean to imply 1D data with a variety -# of types, but vectors are actually 2D (row or columns...) +# of types? + +# TODO now that we can parse numpydoc style strings, do we need to define dicts +# of docstring components, or just write out a docstring? + _core_params = dict( data=""" diff --git a/seaborn/distributions.py b/seaborn/distributions.py index 1ff606ee68..6d68a58b23 100644 --- a/seaborn/distributions.py +++ b/seaborn/distributions.py @@ -867,160 +867,7 @@ def kdeplot( Examples -------- -Plot a univariate distribution along the x axis: - -.. plot:: - :context: close-figs - - >>> import seaborn as sns; sns.set() - >>> tips = sns.load_dataset("tips") - >>> ax = sns.kdeplot(data=tips, x="total_bill") - -Flip the plot by assigning the data variable to the y axis: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot(data=tips, y="total_bill") - -Plot distributions for each column of a wide-form dataset: - -.. plot:: - :context: close-figs - - >>> iris = sns.load_dataset("iris") - >>> ax = sns.kdeplot(data=iris) - -Use less smoothing: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot(data=tips, x="total_bill", bw_adjust=.2) - -Use more smoothing, but don't smooth past the extreme data points: - -.. plot:: - :context: close-figs - - >>> ax= sns.kdeplot(data=tips, x="total_bill", bw_adjust=5, cut=0) - -Plot conditional distributions with hue mapping of a second variable: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot(data=tips, x="total_bill", hue="time") - -"Stack" the conditional distributions: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=tips, x="total_bill", hue="time", multiple="stack" - ... ) - -Normalize the stacked distribution at each value in the grid: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=tips, x="total_bill", hue="time", multiple="fill" - ... ) - -Estimate the cumulative distribution function(s), normalizing each subset: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=tips, x="total_bill", hue="time", - ... cumulative=True, common_norm=False, common_grid=True, - ... ) - -Estimate distribution from aggregated data, using weights: - -.. plot:: - :context: close-figs - - >>> tips_agg = (tips - ... .groupby("size") - ... .agg(total_bill=("total_bill", "mean"), n=("total_bill", "count")) - ... ) - >>> ax = sns.kdeplot(data=tips_agg, x="total_bill", weights="n") - -Map the data variable with log scaling: - -.. plot:: - :context: close-figs - - >>> diamonds = sns.load_dataset("diamonds") - >>> ax = sns.kdeplot(data=diamonds, x="price", log_scale=True) - -Use numeric hue mapping: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot(data=tips, x="total_bill", hue="size") - -Modify the appearance of the plot: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=tips, x="total_bill", hue="size", - ... fill=True, common_norm=False, palette="viridis", - ... alpha=.5, linewidth=0, - ... ) - -Plot a bivariate distribution: - -.. plot:: - :context: close-figs - - >>> geyser = sns.load_dataset("geyser") - >>> ax = sns.kdeplot(data=geyser, x="waiting", y="duration") - -Map a third variable with a hue semantic to show conditional distributions: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot(data=geyser, x="waiting", y="duration", hue="kind") - -Show filled contours: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=geyser, x="waiting", y="duration", hue="kind", fill=True, - ... ) - -Show fewer contour levels, covering less of the distribution: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=geyser, x="waiting", y="duration", hue="kind", - ... levels=5, thresh=.2, - ... ) - -Fill the axes extent with a smooth distribution, using a different colormap: - -.. plot:: - :context: close-figs - - >>> ax = sns.kdeplot( - ... data=geyser, x="waiting", y="duration", - ... fill=True, thresh=0, levels=100, cmap="mako", - ... ) - +.. include:: ../docstrings/kdeplot.rst """.format( params=_param_docs, @@ -1199,6 +1046,9 @@ def rugplot( overlap with other elements. legend : bool If False, do not add a legend for semantic variables. +kwargs + Other keyword arguments are passed to + :meth:`matplotlib.collections.LineCollection` Returns ------- @@ -1207,58 +1057,7 @@ def rugplot( Examples -------- -Add a rug along one of the axes: - -.. plot:: - :context: close-figs - - >>> import seaborn as sns; sns.set() - >>> tips = sns.load_dataset("tips") - >>> ax = sns.kdeplot(data=tips, x="total_bill") - >>> ax = sns.rugplot(data=tips, x="total_bill") - -Add a rug along both axes: - -.. plot:: - :context: close-figs - - >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip") - >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip") - -Represent a third variable with hue mapping: - -.. plot:: - :context: close-figs - - >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip", hue="time") - >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip", hue="time") - -Draw a taller rug: - -.. plot:: - :context: close-figs - - >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip") - >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip", height=.1) - -Put the rug outside the axes: - -.. plot:: - :context: close-figs - - >>> ax = sns.scatterplot(data=tips, x="total_bill", y="tip") - >>> ax = sns.rugplot(data=tips, x="total_bill", y="tip", - ... height=-.02, clip_on=False, - ... ) - -Show the density of a larger dataset using thinner lines and alpha blending: - -.. plot:: - :context: close-figs - - >>> diamonds = sns.load_dataset("diamonds") - >>> ax = sns.scatterplot(data=diamonds, x="carat", y="price", s=5) - >>> ax = sns.rugplot(data=diamonds, x="carat", y="price", lw=1, alpha=.005) +.. include:: ../docstrings/rugplot.rst """.format( params=_param_docs,