Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Using a notebook + nbconvert to execute code blocks #422

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion continuous_integration/travis/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ if [ "$DISTRIB" == "conda" ]; then
conda update -y conda

# Force conda to think about other dependencies that can break
export CONDA_PKGS="python=$PYTHON_VERSION pip numpy scipy setuptools matplotlib pillow pytest pytest-cov coverage seaborn sphinx_rtd_theme memory_profiler"
export CONDA_PKGS="python=$PYTHON_VERSION pip numpy scipy setuptools matplotlib pillow pytest pytest-cov coverage seaborn sphinx_rtd_theme memory_profiler nbconvert nbformat"
if [ "$INSTALL_MAYAVI" == "true" ]; then
conda create -yn testenv $CONDA_PKGS mayavi
else
Expand Down
2 changes: 2 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx_gallery.gen_gallery',
'jupyter_sphinx.embed_widgets', # this puts the require js on the page for the widgets
]

# Add any paths that contain templates here, relative to this directory.
Expand Down Expand Up @@ -338,6 +339,7 @@ def setup(app):
'reference_url': {
'sphinx_gallery': None,
},
'executor': 'notebook',
'examples_dirs': examples_dirs,
'gallery_dirs': gallery_dirs,
'image_scrapers': image_scrapers,
Expand Down
13 changes: 13 additions & 0 deletions examples/plot_ipyvolume_scatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Scatter
=======

This example doesn't do much, it just makes a simple plot
"""
import ipyvolume as ipv
import numpy as np
N = 1000
x, y, z = np.random.normal(0, 1, (3, N))
fig = ipv.figure()
ipv.scatter(x, y, z)
ipv.show()
12 changes: 12 additions & 0 deletions examples/plot_pandas_df.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""
Pandas DataFrame
================

This example doesn't do much, it just shows a dataframe
"""
import pandas as pd
import numpy as np
x = np.arange(100)
y = x**2
df = pd.DataFrame(data=dict(x=x, y=y))
df
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ matplotlib
pillow
sphinx
pytest
nbconvert
nbformat
199 changes: 199 additions & 0 deletions sphinx_gallery/executenb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import json
import os

import nbformat
from jupyter_client.kernelspec import get_kernel_spec
from nbconvert.preprocessors.execute import executenb
from nbconvert.exporters.exporter import Exporter

from .gen_rst import indent, CODE_OUTPUT

# the first cell of the notebook will output information on memory usage and the current time
CODE_PRE = r"""
%%matplotlib inline
import sys
src_file = %r
sys.argv = [src_file]
memory = -1
try:
from memory_profiler import memory_usage
memory_usage, _ = memory_usage(lambda: None, max_usage=True, retval=True, multiprocess=True)
except:
pass
import time
time = time.time()
display({'application/json': {'memory_usage': memory, 'time': time}}, raw=True)
"""

# after each cell, we again output the memory and current time for statistics

CODE_POST_CELL = """
memory = -1
try:
from memory_profiler import memory_usage
memory_usage, _ = memory_usage(lambda: None, max_usage=True, retval=True, multiprocess=True)
except:
pass
import time
time = time.time()
display({'application/json': {'memory_usage': memory, 'time': time}}, raw=True)
"""

CODE_CELL = """
%s
"""

# in the last cell, we collect a possible widget state, and again the time
CODE_POST = """
state = None
try:
import ipywidgets as widgets
state = widgets.Widget.get_manager_state()
except:
pass
display({'application/json': state}, raw=True)

import time
time = time.time()
display({'application/json': {'time': time}}, raw=True)
"""

_mime_renderer = {}
def mime_renderer_jupyter_widgets(json_data):
import ipywidgets.embed
html_snippet = ipywidgets.embed.widget_view_template.format(view_spec=json.dumps(json_data))
return mime_renderer_text_html(html_snippet)

def mime_renderer_image_png(json_data):
html_snippet = '<img src="data:image/png;base64,%s"/>\n' % json_data
return mime_renderer_text_html(html_snippet)

def mime_renderer_text_plain(json_data):
return 'plain text'


def mime_renderer_text_html(json_data):
html = """
.. raw:: html

{0}
""".format(indent(json_data, u' ' * 4))
return html

from base64 import b64decode


# ordered by priority, maybe we could use nbconvert for this
_mime_renderers = [
('application/vnd.jupyter.widget-view+json', mime_renderer_jupyter_widgets),
('text/html', mime_renderer_text_html),
('image/png', mime_renderer_image_png),
('text/plain', mime_renderer_text_plain)
]

def execute_script_notebook(script_blocks, script_vars, gallery_conf):
kernel_name = gallery_conf.get('jupyter_kernel', 'python3')
kernel_spec = get_kernel_spec(kernel_name)

nb = nbformat.v4.new_notebook( metadata={'kernelspec': {
'display_name': kernel_spec.display_name,
'language': kernel_spec.language,
'name': kernel_name,
}})

cell_pre = nbformat.v4.new_code_cell(CODE_PRE % script_vars['src_file'])
nb['cells'].append(cell_pre)

for block in script_blocks:
blabel, bcontent, lineno = block
if not script_vars['execute_script'] or blabel == 'text':
cell = nbformat.v4.new_code_cell('# placeholder for text block')
else:
cell = nbformat.v4.new_code_cell(CODE_CELL % bcontent)
nb['cells'].append(cell)
cell = nbformat.v4.new_code_cell(CODE_POST_CELL)
nb['cells'].append(cell)

cell_post = nbformat.v4.new_code_cell(CODE_POST)
nb['cells'].append(cell_post)

# this will execute the notebook and populate the cells
from nbconvert.preprocessors import ExecutePreprocessor, CSSHTMLHeaderPreprocessor
from nbconvert.preprocessors.snapshot import SnapshotPreProcessor
nbconvert_snapshot_config = gallery_conf.get('nbconvert', {}).get('snapshot', {})

src_file = script_vars['src_file']
cwd = os.getcwd()
os.chdir(os.path.dirname(src_file))
try:
preprocessors = [ExecutePreprocessor(enabled=True, allow_errors=False), CSSHTMLHeaderPreprocessor(enabled=True), SnapshotPreProcessor(enabled=True, **nbconvert_snapshot_config)]
exporter = Exporter(preprocessors=preprocessors, default_preprocessors=[])
nb, resources = exporter.from_notebook_node(nb, {})
finally:
os.chdir(cwd)

cell_pre = nb.cells[0]
cell_post = nb.cells[-1]


memory_start = cell_pre.outputs[0]['data']['application/json']['memory_usage']
time_start = cell_pre.outputs[0]['data']['application/json']['time']

output_blocks = []
memory_usage = []

# we have a pre cell, for each code block 2 cells, and a post cell
for code_cell, info_cell in zip(nb.cells[1:-1:2], nb.cells[2:-1:2]):
my_stdout_parts = []
extra_output_parts = []
for output in code_cell.outputs:
if output['output_type'] == 'stream':
if output['name'] == 'stdout':
my_stdout_parts.append(output['text'])
elif output['name'] == 'stderr':
print('warning, stderr detected', output['text'])
if 'data' in output:# == 'display_data':
renderered = False
for mime_type, renderer in _mime_renderers:
if mime_type in output['data']:
extra_output_parts.append(renderer(output['data'][mime_type]))
renderered = True
break
if 'image/png' in output['data']:
image_path_iterator = script_vars['image_path_iterator']
path = next(image_path_iterator)
print('save to', path)
with open(path, 'wb') as f:
image_data = b64decode(output['data']['image/png'])
f.write(image_data)
else:
pass # here we could support other images, and make a screenshot of a widget or HTML/pdf even

if not renderered:
raise ValueError('cell not rendered')

my_stdout = ''.join(my_stdout_parts).strip().expandtabs()
if my_stdout:
stdout = CODE_OUTPUT.format(indent(my_stdout, u' ' * 4))
else:
stdout = ''

images_rst = ''
code_output = u"\n{0}\n\n{1}\n\n".format(images_rst, stdout)
code_output += '\n\n'.join(extra_output_parts)
output_blocks.append(code_output)

assert len(info_cell.outputs) == 1
memory_usage.append(info_cell.outputs[0]['data']['application/json']['memory_usage'])

cell_post.outputs = [output for output in cell_post.outputs if not ('name' in output and output.name == 'stderr')]
if output_blocks:
widget_state = cell_post.outputs[0]['data']['application/json']
if widget_state:
import ipywidgets.embed
widget_state_html_snippet = ipywidgets.embed.snippet_template.format(load='', widget_views='', json_data=json.dumps(widget_state))
output_blocks[-1] = output_blocks[-1] + '\n\n' + mime_renderer_text_html(widget_state_html_snippet)

time_end = cell_post.outputs[1]['data']['application/json']['time']
script_vars['memory_delta'] = (max(memory_usage) - memory_start)
return output_blocks, time_end - time_start
12 changes: 11 additions & 1 deletion sphinx_gallery/gen_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def executable_script(src_file, gallery_conf):
return execute


def execute_script(script_blocks, script_vars, gallery_conf):
def execute_script_classic(script_blocks, script_vars, gallery_conf):
"""Execute and capture output from python script already in block structure

Parameters
Expand Down Expand Up @@ -574,6 +574,12 @@ def execute_script(script_blocks, script_vars, gallery_conf):

return output_blocks, time_elapsed

_executors = {'classic': execute_script_classic}
try:
from .executenb import execute_script_notebook
_executors['notebook'] = execute_script_notebook
except:
pass

def generate_file_rst(fname, target_dir, src_dir, gallery_conf):
"""Generate the rst file for a given example.
Expand Down Expand Up @@ -621,6 +627,10 @@ def generate_file_rst(fname, target_dir, src_dir, gallery_conf):
'target_file': target_file}

file_conf, script_blocks = split_code_and_text_blocks(src_file)
# executor can be a callable, or 'classic'/'notebook'
execute_script = gallery_conf.get('executor', execute_script_classic)
if not callable(execute_script):
execute_script = _executors[execute_script]
output_blocks, time_elapsed = execute_script(script_blocks,
script_vars,
gallery_conf)
Expand Down
9 changes: 9 additions & 0 deletions sphinx_gallery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import sphinx_gallery.gen_gallery
import sphinx_gallery.gen_rst
from sphinx_gallery import sphinx_compatibility
from sphinx_gallery.gen_gallery import _complete_gallery_conf
from sphinx_gallery.utils import _TempDir


Params = collections.namedtuple('Params', 'args kwargs')
Expand Down Expand Up @@ -70,6 +72,13 @@ def log_collector():
sphinx_gallery.gen_rst.logger = orig_gr_logger


@pytest.fixture
def gallery_conf(tmpdir):
"""Sets up a test sphinx-gallery configuration"""
gallery_conf = _complete_gallery_conf({}, str(tmpdir), True, False)
gallery_conf.update(examples_dir=_TempDir(), gallery_dir=str(tmpdir))
return gallery_conf

@pytest.fixture
def unicode_sample(tmpdir):
"""Return temporary python source file with Unicode in various places"""
Expand Down
38 changes: 38 additions & 0 deletions sphinx_gallery/tests/test_executenb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import codecs
import os
import re

from sphinx_gallery.gen_rst import MixedEncodingStringIO
import sphinx_gallery.gen_rst as sg
from sphinx_gallery.tests.test_gen_rst import CONTENT


def test_executenb(gallery_conf):
gallery_conf.update(filename_pattern=re.escape(os.sep) + 'plot_0')
gallery_conf.update(executor='notebook')

code_output = ('\n Out:\n\n .. code-block:: none\n'
'\n'
' Óscar output\n'
' log:Óscar\n'
' $\\langle n_\\uparrow n_\\downarrow \\rangle$\n\n'
)
# create three files in tempdir (only one matches the pattern)
fnames = ['plot_0.py', 'plot_1.py', 'plot_2.py']
for fname in fnames:
with codecs.open(os.path.join(gallery_conf['examples_dir'], fname),
mode='w', encoding='utf-8') as f:
f.write('\n'.join(CONTENT))
# generate rst file
sg.generate_file_rst(fname, gallery_conf['gallery_dir'],
gallery_conf['examples_dir'], gallery_conf)
# read rst file and check if it contains code output
rst_fname = os.path.splitext(fname)[0] + '.rst'
with codecs.open(os.path.join(gallery_conf['gallery_dir'], rst_fname),
mode='r', encoding='utf-8') as f:
rst = f.read()
if re.search(gallery_conf['filename_pattern'],
os.path.join(gallery_conf['gallery_dir'], rst_fname)):
assert code_output in rst
else:
assert code_output not in rst
9 changes: 1 addition & 8 deletions sphinx_gallery/tests/test_gen_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import sphinx_gallery.gen_rst as sg
from sphinx_gallery import downloads
from sphinx_gallery.gen_gallery import generate_dir_rst, _complete_gallery_conf
from sphinx_gallery.gen_gallery import generate_dir_rst
from sphinx_gallery.utils import _TempDir

CONTENT = [
Expand Down Expand Up @@ -174,13 +174,6 @@ def test_md5sums():
os.remove(f.name)


@pytest.fixture
def gallery_conf(tmpdir):
"""Sets up a test sphinx-gallery configuration"""
gallery_conf = _complete_gallery_conf({}, str(tmpdir), True, False)
gallery_conf.update(examples_dir=_TempDir(), gallery_dir=str(tmpdir))
return gallery_conf


def test_fail_example(gallery_conf, log_collector):
"""Test that failing examples are only executed until failing block"""
Expand Down
1 change: 0 additions & 1 deletion sphinx_gallery/tests/test_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import sphinx_gallery.gen_rst as sg
from sphinx_gallery.notebook import (rst2md, jupyter_notebook, save_notebook,
python_to_jupyter_cli)
from sphinx_gallery.tests.test_gen_rst import gallery_conf

try:
FileNotFoundError
Expand Down