DOC: Use nbsphinx for notebook doc build (#15581)

Adds a new doc-dependency nbsphinx for converting jupyter notebooks to ReST, which works better with the sphinx conversion process. Remvoes the hacky notebook -> HTML -> raw include we had before.
pandas-dev · Apr 8, 2017 · 88f5851 · 88f5851
1 parent 84de51c
commit 88f5851
Show file tree

Hide file tree

Showing 10 changed files with 118 additions and 174 deletions.
diff --git a/ci/requirements-3.5_DOC.run b/ci/requirements-3.5_DOC.run
@@ -5,6 +5,7 @@ nbconvert
 nbformat
 notebook
 matplotlib
+seaborn
 scipy
 lxml
 beautifulsoup4

diff --git a/ci/requirements-3.5_DOC.sh b/ci/requirements-3.5_DOC.sh
@@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]"
 
 pip install pandas-gbq
 
-conda install -n pandas -c conda-forge feather-format
+conda install -n pandas -c conda-forge feather-format nbsphinx pandoc
 
 conda install -n pandas -c r r rpy2 --yes
diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt
@@ -3,6 +3,7 @@ pytest-cov
 pytest-xdist
 flake8
 sphinx
+nbsphinx
 ipython
 python-dateutil
 pytz
@@ -19,6 +20,7 @@ scipy
 numexpr
 pytables
 matplotlib
+seaborn
 lxml
 sqlalchemy
 bottleneck

diff --git a/doc/README.rst b/doc/README.rst
@@ -81,7 +81,9 @@ have ``sphinx`` and ``ipython`` installed. `numpydoc
 <https://github.com/numpy/numpydoc>`_ is used to parse the docstrings that
 follow the Numpy Docstring Standard (see above), but you don't need to install
 this because a local copy of ``numpydoc`` is included in the pandas source
-code.
+code. `nbsphinx <https://nbsphinx.readthedocs.io/>`_ is used to convert
+Jupyter notebooks. You will need to install it if you intend to modify any of
+the notebooks included in the documentation.
 
 Furthermore, it is recommended to have all `optional dependencies
 <http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies>`_

diff --git a/doc/make.py b/doc/make.py
@@ -106,106 +106,42 @@ def clean():
 
 
 @contextmanager
-def cleanup_nb(nb):
-    try:
-        yield
-    finally:
-        try:
-            os.remove(nb + '.executed')
-        except OSError:
-            pass
-
-
-def get_kernel():
-    """Find the kernel name for your python version"""
-    return 'python%s' % sys.version_info.major
-
-
-def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=''):
-    """
-    Execute notebook in `src` and write the output to `dst`
-
-    Parameters
-    ----------
-    src, dst: str
-        path to notebook
-    allow_errors: bool
-    timeout: int
-    kernel_name: str
-        defualts to value set in notebook metadata
-
-    Returns
-    -------
-    dst: str
-    """
-    import nbformat
-    from nbconvert.preprocessors import ExecutePreprocessor
-
-    with io.open(src, encoding='utf-8') as f:
-        nb = nbformat.read(f, as_version=4)
-
-    ep = ExecutePreprocessor(allow_errors=allow_errors,
-                             timeout=timeout,
-                             kernel_name=kernel_name)
-    ep.preprocess(nb, resources={})
-
-    with io.open(dst, 'wt', encoding='utf-8') as f:
-        nbformat.write(nb, f)
-    return dst
-
-
-def convert_nb(src, dst, to='html', template_file='basic'):
+def maybe_exclude_notebooks():
     """
-    Convert a notebook `src`.
-
-    Parameters
-    ----------
-    src, dst: str
-        filepaths
-    to: {'rst', 'html'}
-        format to export to
-    template_file: str
-        name of template file to use. Default 'basic'
+    Skip building the notebooks if pandoc is not installed.
+    This assumes that nbsphinx is installed.
     """
-    from nbconvert import HTMLExporter, RSTExporter
-
-    dispatch = {'rst': RSTExporter, 'html': HTMLExporter}
-    exporter = dispatch[to.lower()](template_file=template_file)
-
-    (body, resources) = exporter.from_filename(src)
-    with io.open(dst, 'wt', encoding='utf-8') as f:
-        f.write(body)
-    return dst
+    base = os.path.dirname(__file__)
+    notebooks = [os.path.join(base, 'source', nb)
+                 for nb in ['style.ipynb']]
+    contents = {}
+    try:
+        import nbconvert
+        nbconvert.utils.pandoc.get_pandoc_version()
+    except (ImportError, nbconvert.utils.pandoc.PandocMissing):
+        print("Warning: Pandoc is not installed. Skipping Notebooks.")
+        for nb in notebooks:
+            with open(nb, 'rt') as f:
+                contents[nb] = f.read()
+            os.remove(nb)
+    yield
+    for nb, content in contents.items():
+        with open(nb, 'wt') as f:
+            f.write(content)
 
 
 def html():
     check_build()
 
-    notebooks = [
-        'source/html-styling.ipynb',
-    ]
-
-    for nb in notebooks:
-        with cleanup_nb(nb):
-            try:
-                print("Converting %s" % nb)
-                kernel_name = get_kernel()
-                executed = execute_nb(nb, nb + '.executed', allow_errors=True,
-                                      kernel_name=kernel_name)
-                convert_nb(executed, nb.rstrip('.ipynb') + '.html')
-            except (ImportError, IndexError) as e:
-                print(e)
-                print("Failed to convert %s" % nb)
-
-    if os.system('sphinx-build -P -b html -d build/doctrees '
-                 'source build/html'):
-        raise SystemExit("Building HTML failed.")
-    try:
-        # remove stale file
-        os.remove('source/html-styling.html')
-        os.remove('build/html/pandas.zip')
-    except:
-        pass
+    with maybe_exclude_notebooks():
+        if os.system('sphinx-build -P -b html -d build/doctrees '
+                     'source build/html'):
+            raise SystemExit("Building HTML failed.")
+        try:
+            # remove stale file
+            os.remove('build/html/pandas.zip')
+        except:
+            pass
 
 
 def zip_html():

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -52,14 +52,16 @@
               'numpydoc', # used to parse numpy-style docstrings for autodoc
               'ipython_sphinxext.ipython_directive',
               'ipython_sphinxext.ipython_console_highlighting',
+              'IPython.sphinxext.ipython_console_highlighting',  # lowercase didn't work
               'sphinx.ext.intersphinx',
               'sphinx.ext.coverage',
               'sphinx.ext.mathjax',
               'sphinx.ext.ifconfig',
               'sphinx.ext.linkcode',
+              'nbsphinx',
               ]
 
-
+exclude_patterns = ['**.ipynb_checkpoints']
 
 with open("index.rst") as f:
     index_rst_lines = f.readlines()
@@ -70,15 +72,16 @@
 # JP: added from sphinxdocs
 autosummary_generate = False
 
-if any([re.match("\s*api\s*",l) for l in index_rst_lines]):
+if any([re.match("\s*api\s*", l) for l in index_rst_lines]):
     autosummary_generate = True
 
 files_to_delete = []
 for f in os.listdir(os.path.dirname(__file__)):
-    if not f.endswith('.rst') or f.startswith('.') or os.path.basename(f) == 'index.rst':
+    if (not f.endswith(('.ipynb', '.rst')) or
+            f.startswith('.') or os.path.basename(f) == 'index.rst'):
         continue
 
-    _file_basename = f.split('.rst')[0]
+    _file_basename = os.path.splitext(f)[0]
     _regex_to_match = "\s*{}\s*$".format(_file_basename)
     if not any([re.match(_regex_to_match, line) for line in index_rst_lines]):
         files_to_delete.append(f)
@@ -261,6 +264,9 @@
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'pandas'
 
+# -- Options for nbsphinx ------------------------------------------------
+
+nbsphinx_allow_errors = True
 
 # -- Options for LaTeX output --------------------------------------------
 

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -347,15 +347,14 @@ have ``sphinx`` and ``ipython`` installed. `numpydoc
 <https://github.com/numpy/numpydoc>`_ is used to parse the docstrings that
 follow the Numpy Docstring Standard (see above), but you don't need to install
 this because a local copy of numpydoc is included in the *pandas* source
-code.
-`nbconvert <https://nbconvert.readthedocs.io/en/latest/>`_ and
-`nbformat <https://nbformat.readthedocs.io/en/latest/>`_ are required to build
+code. `nbsphinx <https://nbsphinx.readthedocs.io/>`_ is required to build
 the Jupyter notebooks included in the documentation.
 
 If you have a conda environment named ``pandas_dev``, you can install the extra
 requirements with::
 
       conda install -n pandas_dev sphinx ipython nbconvert nbformat
+      conda install -n pandas_dev -c conda-forge nbsphinx
 
 Furthermore, it is recommended to have all :ref:`optional dependencies <install.optional_dependencies>`.
 installed. This is not strictly necessary, but be aware that you will see some error