From 843acb1bf636f962e21bca899255458e22a3d9e7 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Reyes Date: Sun, 10 Apr 2022 22:14:26 -0500 Subject: [PATCH] downloading chromium if using webpdf during exporter init --- setup.py | 3 ++ src/ploomber/tasks/notebook.py | 56 ++++++++++++++++------------------ tests/tasks/test_notebook.py | 35 ++++++++++++++++----- 3 files changed, 57 insertions(+), 37 deletions(-) diff --git a/setup.py b/setup.py index 14c65914c..d23837681 100644 --- a/setup.py +++ b/setup.py @@ -95,6 +95,9 @@ def read(name): # optional dependencies for @serializer and @unserializer 'joblib', 'cloudpickle', + + # for testing the webpdf converter + 'nbconvert[webpdf]', ] # packages needed for development diff --git a/src/ploomber/tasks/notebook.py b/src/ploomber/tasks/notebook.py index d9264a16b..f990a6886 100644 --- a/src/ploomber/tasks/notebook.py +++ b/src/ploomber/tasks/notebook.py @@ -5,34 +5,25 @@ import subprocess from subprocess import PIPE from pathlib import Path -import importlib +from importlib.util import find_spec import warnings +import jupytext +import nbformat +import nbconvert from nbconvert import ExporterNameError from nbconvert.exporters.webpdf import WebPDFExporter -try: - # papermill is importing a deprecated module from pyarrow - with warnings.catch_warnings(): - warnings.simplefilter('ignore', FutureWarning) - import papermill as pm -except ImportError: - pm = None - -try: - import jupytext -except ImportError: - jupytext = None - -try: - import nbformat -except ImportError: - nbformat = None +# papermill is importing a deprecated module from pyarrow +with warnings.catch_warnings(): + warnings.simplefilter('ignore', FutureWarning) + import papermill as pm try: - import nbconvert + # this is an optional dependency + from pyppeteer import chromium_downloader except ImportError: - nbconvert = None + chromium_downloader = None from ploomber.exceptions import TaskBuildError, TaskInitializationError from ploomber.sources import NotebookSource @@ -91,10 +82,13 @@ def _suggest_passing_product_dictionary(): 'and any other output paths in other keys)') -def _check_can_use_exporter(exporter): +def _check_exporter(exporter, path_to_output): + """ + Validate if the user can use the selected exporter + """ if isinstance(exporter, WebPDFExporter): - pyppeteer_installed = importlib.util.find_spec('pyppeteer') is not None + pyppeteer_installed = find_spec('pyppeteer') is not None if not pyppeteer_installed: raise TaskInitializationError( @@ -102,11 +96,16 @@ def _check_can_use_exporter(exporter): 'webpdf, install it ' 'with:\npip install "nbconvert[webpdf]"') else: - from pyppeteer import chromium_downloader - if not chromium_downloader.check_chromium(): chromium_downloader.download_chromium() + if Path(path_to_output).suffix != '.pdf': + raise TaskInitializationError( + 'Expected output to have ' + 'extension .pdf when using the webpdf ' + f'exporter, got: {path_to_output}. Change the extension ' + 'and try again') + class NotebookConverter: """ @@ -169,8 +168,6 @@ def __init__(self, self._exporter = self._get_exporter(exporter_name, path_to_output) - _check_can_use_exporter(self._exporter) - self._path_to_output = path_to_output self._nbconvert_export_kwargs = nbconvert_export_kwargs or {} @@ -201,7 +198,6 @@ def _get_exporter(exporter_name, path_to_output): if an exporter can't be located """ extension2exporter_name = {'md': 'markdown'} - exporter_params = {'webpdf': {'allow_chromium_download': True}} # sometimes extension does not match with the exporter name, fix # if needed @@ -213,9 +209,7 @@ def _get_exporter(exporter_name, path_to_output): else: try: exporter = nbconvert.get_exporter(exporter_name) - params = exporter_params.get(exporter_name) - if params: - exporter = exporter(**params) + # nbconvert 5.6.1 raises ValueError, beginning in version 6, # it raises ExporterNameError. However the exception is defined # since 5.6.1 so we can safely import it @@ -232,6 +226,8 @@ def _get_exporter(exporter_name, path_to_output): "Choose one from: " f'{pretty_print.iterable(names)}') + _check_exporter(exporter, path_to_output) + return exporter @staticmethod diff --git a/tests/tasks/test_notebook.py b/tests/tasks/test_notebook.py index 1973c5a47..3b243f8f4 100644 --- a/tests/tasks/test_notebook.py +++ b/tests/tasks/test_notebook.py @@ -20,6 +20,27 @@ def fake_from_notebook_node(self, nb, resources): return bytes(42), None +def test_checks_exporter(monkeypatch): + # simulate pyppeteer not installed + monkeypatch.setattr(notebook, 'find_spec', lambda _: None) + + with pytest.raises(TaskInitializationError) as excinfo: + notebook.NotebookConverter('out.pdf', 'webpdf') + + expected = 'pip install "nbconvert[webpdf]"' + assert expected in str(excinfo.value) + + +def test_downloads_chromium_if_needed(monkeypatch): + mock = Mock() + mock.check_chromium.return_value = False + monkeypatch.setattr(notebook, 'chromium_downloader', mock) + + notebook.NotebookConverter('out.pdf', 'webpdf') + + mock.download_chromium.assert_called_once_with() + + def test_error_when_path_has_no_extension(): with pytest.raises(TaskInitializationError) as excinfo: notebook.NotebookConverter('a') @@ -52,12 +73,13 @@ def test_notebook_converter_get_exporter_from_name(exporter_name, exporter): assert converter._exporter == exporter -def test_notebook_convertor_get_exporter_webpdf(): - exporter_name = 'webpdf' - exporter = nbconvert.exporters.webpdf.WebPDFExporter - converter = notebook.NotebookConverter('file.ext', exporter_name) - assert isinstance(converter._exporter, exporter) - assert converter._exporter.allow_chromium_download is True +def test_notebook_converter_validates_extension(): + with pytest.raises(TaskInitializationError) as excinfo: + notebook.NotebookConverter('file.not_a_pdf', 'webpdf') + + expected = ('Expected output to have extension .pdf when using the ' + 'webpdf exporter, got: file.not_a_pdf') + assert expected in str(excinfo.value) @pytest.mark.parametrize('output', [ @@ -620,7 +642,6 @@ def tmp_dag(tmp_directory): def test_develop_saves_changes(tmp_dag, monkeypatch): - def mock_jupyter_notebook(args, check): nb = jupytext.reads('2 + 2', fmt='py') # args: "jupyter" {app} {path} {other args, ...}