Skip to content

Commit

Permalink
downloading chromium if using webpdf during exporter init
Browse files Browse the repository at this point in the history
  • Loading branch information
edublancas committed Apr 11, 2022
1 parent 4ae4507 commit 843acb1
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 37 deletions.
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def read(name):
# optional dependencies for @serializer and @unserializer
'joblib',
'cloudpickle',

# for testing the webpdf converter
'nbconvert[webpdf]',
]

# packages needed for development
Expand Down
56 changes: 26 additions & 30 deletions src/ploomber/tasks/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,25 @@
import subprocess
from subprocess import PIPE
from pathlib import Path
import importlib
from importlib.util import find_spec
import warnings

import jupytext
import nbformat
import nbconvert
from nbconvert import ExporterNameError
from nbconvert.exporters.webpdf import WebPDFExporter

try:
# papermill is importing a deprecated module from pyarrow
with warnings.catch_warnings():
warnings.simplefilter('ignore', FutureWarning)
import papermill as pm
except ImportError:
pm = None

try:
import jupytext
except ImportError:
jupytext = None

try:
import nbformat
except ImportError:
nbformat = None
# papermill is importing a deprecated module from pyarrow
with warnings.catch_warnings():
warnings.simplefilter('ignore', FutureWarning)
import papermill as pm

try:
import nbconvert
# this is an optional dependency
from pyppeteer import chromium_downloader
except ImportError:
nbconvert = None
chromium_downloader = None

from ploomber.exceptions import TaskBuildError, TaskInitializationError
from ploomber.sources import NotebookSource
Expand Down Expand Up @@ -91,22 +82,30 @@ def _suggest_passing_product_dictionary():
'and any other output paths in other keys)')


def _check_can_use_exporter(exporter):
def _check_exporter(exporter, path_to_output):
"""
Validate if the user can use the selected exporter
"""

if isinstance(exporter, WebPDFExporter):
pyppeteer_installed = importlib.util.find_spec('pyppeteer') is not None
pyppeteer_installed = find_spec('pyppeteer') is not None

if not pyppeteer_installed:
raise TaskInitializationError(
'pyppeteer is required to use '
'webpdf, install it '
'with:\npip install "nbconvert[webpdf]"')
else:
from pyppeteer import chromium_downloader

if not chromium_downloader.check_chromium():
chromium_downloader.download_chromium()

if Path(path_to_output).suffix != '.pdf':
raise TaskInitializationError(
'Expected output to have '
'extension .pdf when using the webpdf '
f'exporter, got: {path_to_output}. Change the extension '
'and try again')


class NotebookConverter:
"""
Expand Down Expand Up @@ -169,8 +168,6 @@ def __init__(self,

self._exporter = self._get_exporter(exporter_name, path_to_output)

_check_can_use_exporter(self._exporter)

self._path_to_output = path_to_output
self._nbconvert_export_kwargs = nbconvert_export_kwargs or {}

Expand Down Expand Up @@ -201,7 +198,6 @@ def _get_exporter(exporter_name, path_to_output):
if an exporter can't be located
"""
extension2exporter_name = {'md': 'markdown'}
exporter_params = {'webpdf': {'allow_chromium_download': True}}

# sometimes extension does not match with the exporter name, fix
# if needed
Expand All @@ -213,9 +209,7 @@ def _get_exporter(exporter_name, path_to_output):
else:
try:
exporter = nbconvert.get_exporter(exporter_name)
params = exporter_params.get(exporter_name)
if params:
exporter = exporter(**params)

# nbconvert 5.6.1 raises ValueError, beginning in version 6,
# it raises ExporterNameError. However the exception is defined
# since 5.6.1 so we can safely import it
Expand All @@ -232,6 +226,8 @@ def _get_exporter(exporter_name, path_to_output):
"Choose one from: "
f'{pretty_print.iterable(names)}')

_check_exporter(exporter, path_to_output)

return exporter

@staticmethod
Expand Down
35 changes: 28 additions & 7 deletions tests/tasks/test_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,27 @@ def fake_from_notebook_node(self, nb, resources):
return bytes(42), None


def test_checks_exporter(monkeypatch):
# simulate pyppeteer not installed
monkeypatch.setattr(notebook, 'find_spec', lambda _: None)

with pytest.raises(TaskInitializationError) as excinfo:
notebook.NotebookConverter('out.pdf', 'webpdf')

expected = 'pip install "nbconvert[webpdf]"'
assert expected in str(excinfo.value)


def test_downloads_chromium_if_needed(monkeypatch):
mock = Mock()
mock.check_chromium.return_value = False
monkeypatch.setattr(notebook, 'chromium_downloader', mock)

notebook.NotebookConverter('out.pdf', 'webpdf')

mock.download_chromium.assert_called_once_with()


def test_error_when_path_has_no_extension():
with pytest.raises(TaskInitializationError) as excinfo:
notebook.NotebookConverter('a')
Expand Down Expand Up @@ -52,12 +73,13 @@ def test_notebook_converter_get_exporter_from_name(exporter_name, exporter):
assert converter._exporter == exporter


def test_notebook_convertor_get_exporter_webpdf():
exporter_name = 'webpdf'
exporter = nbconvert.exporters.webpdf.WebPDFExporter
converter = notebook.NotebookConverter('file.ext', exporter_name)
assert isinstance(converter._exporter, exporter)
assert converter._exporter.allow_chromium_download is True
def test_notebook_converter_validates_extension():
with pytest.raises(TaskInitializationError) as excinfo:
notebook.NotebookConverter('file.not_a_pdf', 'webpdf')

expected = ('Expected output to have extension .pdf when using the '
'webpdf exporter, got: file.not_a_pdf')
assert expected in str(excinfo.value)


@pytest.mark.parametrize('output', [
Expand Down Expand Up @@ -620,7 +642,6 @@ def tmp_dag(tmp_directory):


def test_develop_saves_changes(tmp_dag, monkeypatch):

def mock_jupyter_notebook(args, check):
nb = jupytext.reads('2 + 2', fmt='py')
# args: "jupyter" {app} {path} {other args, ...}
Expand Down

0 comments on commit 843acb1

Please sign in to comment.