From d2200db79e13efa5004f1273d5a8f96009716d88 Mon Sep 17 00:00:00 2001 From: jbannister Date: Thu, 14 Apr 2022 12:41:21 +0100 Subject: [PATCH] Attempt a fix for py37 build. Unpinning nbconvert & adding ipython_genutils --- .circleci/config.yml | 2 +- CONTRIBUTING.md | 2 +- notebooker/constants.py | 2 +- notebooker/execute_notebook.py | 2 +- notebooker/serialization/mongo.py | 28 ++++--- notebooker/utils/conversion.py | 1 + setup.py | 31 ++++---- tests/conftest.py | 44 ++++++++++- tests/integration/conftest.py | 3 +- tests/integration/test_e2e.py | 24 +++--- tests/integration/test_execute_notebook.py | 80 +++++++++++++------- tests/integration/test_report_hunter.py | 2 +- tests/integration/utils/test_conversion.py | 85 ++++++++++++++++++++++ tests/unit/utils/test_conversion.py | 71 ------------------ 14 files changed, 235 insertions(+), 142 deletions(-) create mode 100644 tests/integration/utils/test_conversion.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 9b6d92d4..7ac2e2a7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -81,7 +81,7 @@ defaults: &defaults virtualenv ci . ci/bin/activate pip install six lxml flake8 tox pytest black .[test] - python setup.py develop + pip install --editable . # Save dependency cache - save_cache: key: v1-dep-{{ .Branch }}-{{ epoch }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f166e1f8..d2fbdaf2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,7 @@ # Dev environment setup Dev environment setup is largely the same as setup in the tutorial, but instead of pip installing the version -in pypi, you can set up using `python setup.py develop`. +in pypi, you can set up using `pip install --editable .`. # Contributing diff --git a/notebooker/constants.py b/notebooker/constants.py index 280f3d36..dfca47c7 100644 --- a/notebooker/constants.py +++ b/notebooker/constants.py @@ -148,7 +148,7 @@ class NotebookResultComplete(NotebookResultBase): raw_html = attr.ib(default="") email_html = attr.ib(default="") update_time = attr.ib(default=datetime.datetime.now()) - pdf = attr.ib(default="") + pdf = attr.ib(default=b"") report_title = attr.ib(default="") overrides = attr.ib(default=attr.Factory(dict)) mailto = attr.ib(default="") diff --git a/notebooker/execute_notebook.py b/notebooker/execute_notebook.py index 3af8418a..750ba0b4 100644 --- a/notebooker/execute_notebook.py +++ b/notebooker/execute_notebook.py @@ -110,7 +110,7 @@ def _run_checks( logger.info("Saving output notebook as HTML from {}".format(ipynb_executed_path)) html, resources = ipython_to_html(ipynb_executed_path, job_id) - email_html, resources = ipython_to_html(ipynb_executed_path, job_id, hide_code=hide_code) + email_html, _ = ipython_to_html(ipynb_executed_path, job_id, hide_code=hide_code) pdf = ipython_to_pdf(raw_executed_ipynb, report_title, hide_code=hide_code) if generate_pdf_output else "" notebook_result = NotebookResultComplete( diff --git a/notebooker/serialization/mongo.py b/notebooker/serialization/mongo.py index e7db30c6..b181bdc5 100644 --- a/notebooker/serialization/mongo.py +++ b/notebooker/serialization/mongo.py @@ -199,16 +199,26 @@ def _convert_result( if cls is None: return None + def ignore_missing_files(f): + def _ignore_missing_files(path, *args, **kwargs): + try: + return f(path, *args, **kwargs) + except NoFile: + logger.error("Could not find file %s in %s", path, self.result_data_store) + return "" + return _ignore_missing_files + + @ignore_missing_files def read_file(path, is_json=False): + r = self.result_data_store.get_last_version(path).read() try: - r = self.result_data_store.get_last_version(path).read() - try: - return "" if not r else json.loads(r) if is_json else r.decode("utf8") - except UnicodeDecodeError: - return r - except NoFile: - logger.error("Could not find file %s in %s", path, self.result_data_store) - return "" + return "" if not r else json.loads(r) if is_json else r.decode("utf8") + except UnicodeDecodeError: + return r + + @ignore_missing_files + def read_bytes_file(path): + return self.result_data_store.get_last_version(path).read() if not load_payload: result.pop("stdout", None) @@ -219,7 +229,7 @@ def read_file(path, is_json=False): result["raw_html_resources"]["outputs"] = outputs if result.get("generate_pdf_output"): pdf_filename = _pdf_filename(result["job_id"]) - result["pdf"] = read_file(pdf_filename) + result["pdf"] = read_bytes_file(pdf_filename) if not result.get("raw_ipynb_json"): result["raw_ipynb_json"] = read_file(_raw_json_filename(result["job_id"]), is_json=True) if not result.get("raw_html"): diff --git a/notebooker/utils/conversion.py b/notebooker/utils/conversion.py index d4f14156..82b643d5 100644 --- a/notebooker/utils/conversion.py +++ b/notebooker/utils/conversion.py @@ -33,6 +33,7 @@ def ipython_to_html(ipynb_path: str, job_id: str, hide_code: bool = False) -> (n nb = nbformat.reads(nb_file.read(), as_version=nbformat.v4.nbformat) resources_dir = get_resources_dir(job_id) html, resources = html_exporter_with_figs.from_notebook_node(nb, resources={"output_files_dir": resources_dir}) + resources = {k: v for (k, v) in resources.items() if not callable(v)} return html, resources diff --git a/setup.py b/setup.py index 30d49191..ccb5194e 100644 --- a/setup.py +++ b/setup.py @@ -51,28 +51,29 @@ def get_long_description(): zip_safe=False, include_package_data=True, install_requires=[ + "apscheduler", + "babel", + "cachelib", + "click>7.1.0", + "dataclasses", + "flask", "gevent", + "gitpython", + "inflection", + "ipykernel", "ipython", - "pandas", + "ipython_genutils", + "jupytext>=1.2.0", "matplotlib", - "pymongo", - "papermill", - "dataclasses", - "nbconvert<6.0.0", # Pin this because new template locations do not seem to work on OSX + "nbconvert", "nbformat", - "jupytext>=1.2.0", - "ipykernel", - "stashy", - "click>7.1.0", + "pandas", + "papermill", + "pymongo", "python-dateutil", - "apscheduler", - "flask", "requests", "retrying", - "gitpython", - "cachelib", - "inflection", - "babel", + "stashy", ], extras_require={ "prometheus": ["prometheus_client"], diff --git a/tests/conftest.py b/tests/conftest.py index 862ede4e..6ac5c046 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,9 @@ +import os + import pytest +from pytest_server_fixtures import CONFIG +from pytest_server_fixtures.mongo import MongoTestServer +from pytest_fixture_config import yield_requires_config from notebooker.constants import ( DEFAULT_DATABASE_NAME, @@ -11,6 +16,43 @@ from notebooker.web.app import create_app, setup_app +class MongoTestServerWithPath(MongoTestServer): + @property + def env(self): + return {"PATH": os.environ["PATH"]} + + +def _mongo_server(): + """This does the actual work - there are several versions of this used + with different scopes. + """ + test_server = MongoTestServerWithPath() + try: + test_server.start() + yield test_server + finally: + test_server.teardown() + + +@pytest.yield_fixture(scope="function") +@yield_requires_config(CONFIG, ["mongo_bin"]) +def mongo_server(): + """Function-scoped MongoDB server started in a local thread. + This also provides a temp workspace. + We tear down, and cleanup mongos at the end of the test. + + For completeness, we tidy up any outstanding mongo temp directories + at the start and end of each test session + + Attributes + ---------- + api (`pymongo.MongoClient`) : PyMongo Client API connected to this server + .. also inherits all attributes from the `workspace` fixture + """ + for server in _mongo_server(): + yield server + + @pytest.fixture def bson_library(mongo_server, test_db_name, test_lib_name): return mongo_server.api[test_db_name][test_lib_name] @@ -98,7 +140,7 @@ def flask_app(webapp_config): @pytest.fixture -def setup_and_cleanup_notebooker_filesystem(webapp_config): +def setup_and_cleanup_notebooker_filesystem(webapp_config, setup_workspace): try: initialise_base_dirs(webapp_config=webapp_config) yield diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 88affd03..e83817b4 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -117,6 +117,7 @@ } """ + @pytest.fixture def setup_workspace(workspace): (workspace.workspace + "/templates").mkdir() @@ -130,4 +131,4 @@ def setup_workspace(workspace): ipynb_report_to_run.write_lines(DUMMY_REPORT_IPYNB.split("\n")) report_to_run_failing = workspace.workspace + "/templates/fake/report_failing.py" - report_to_run_failing.write_lines(DUMMY_FAILING_REPORT.split("\n")) + report_to_run_failing.write_lines(DUMMY_FAILING_REPORT.split("\n")) \ No newline at end of file diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py index ff977ace..8406bd6a 100644 --- a/tests/integration/test_e2e.py +++ b/tests/integration/test_e2e.py @@ -32,10 +32,7 @@ def _check_report_output(job_id, serialiser, **kwargs): @pytest.mark.parametrize( "report_name", - [ - "fake/py_report", - "fake/ipynb_report" - ], + ["fake/py_report", "fake/ipynb_report"], ) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name): @@ -66,6 +63,7 @@ def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesy assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides) assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, None) + @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): with flask_app.app_context(): @@ -91,13 +89,12 @@ def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooke @pytest.mark.parametrize( "report_name", - [ - "fake/py_report", - "fake/ipynb_report" - ], + ["fake/py_report", "fake/ipynb_report"], ) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name): +def test_run_report_and_rerun( + bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name +): with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} @@ -140,13 +137,12 @@ def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_noteboo @pytest.mark.parametrize( "report_name", - [ - "fake/py_report", - "fake/ipynb_report" - ], + ["fake/py_report", "fake/ipynb_report"], ) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name): +def test_run_report_hide_code( + bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name +): with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} diff --git a/tests/integration/test_execute_notebook.py b/tests/integration/test_execute_notebook.py index 58d01530..cb095349 100644 --- a/tests/integration/test_execute_notebook.py +++ b/tests/integration/test_execute_notebook.py @@ -16,18 +16,15 @@ def mock_nb_execute(input_path, output_path, **kw): f.write('{"cells": [], "metadata": {}}') -def test_main(mongo_host): - with mock.patch("notebooker.execute_notebook.pm.execute_notebook") as exec_nb, mock.patch( - "notebooker.utils.conversion.jupytext.read" - ) as read_nb, mock.patch("notebooker.utils.conversion.PDFExporter") as pdf_exporter: +def test_main(mongo_host, setup_and_cleanup_notebooker_filesystem, webapp_config): + with mock.patch("notebooker.utils.conversion.PDFExporter") as pdf_exporter: pdf_contents = b"This is a PDF." pdf_exporter().from_notebook_node.return_value = (pdf_contents, None) versions = nbv.split(".") major, minor = int(versions[0]), int(versions[1]) if major >= 5: major, minor = 4, 4 - read_nb.return_value = NotebookNode({"cells": [], "metadata": {}, "nbformat": major, "nbformat_minor": minor}) - exec_nb.side_effect = mock_nb_execute + # read_nb.return_value = NotebookNode({"cells": [], "metadata": {}, "nbformat": major, "nbformat_minor": minor}) job_id = "ttttteeeesssstttt" runner = CliRunner() cli_result = runner.invoke( @@ -35,15 +32,31 @@ def test_main(mongo_host): [ "--serializer-cls", DEFAULT_SERIALIZER, + "--py-template-base-dir", + webapp_config.PY_TEMPLATE_BASE_DIR, + "--py-template-subdir", + webapp_config.PY_TEMPLATE_SUBDIR, + "--output-base-dir", + webapp_config.OUTPUT_DIR, + "--template-base-dir", + webapp_config.TEMPLATE_DIR, + "--serializer-cls", + webapp_config.SERIALIZER_CLS, "--mongo-host", - mongo_host, + webapp_config.SERIALIZER_CONFIG["mongo_host"], + "--database-name", + webapp_config.SERIALIZER_CONFIG["database_name"], + "--result-collection-name", + webapp_config.SERIALIZER_CONFIG["result_collection_name"], "execute-notebook", "--report-name", - "test_report", + "fake/py_report", "--job-id", job_id, ], ) + if cli_result.exception: + raise cli_result.exception assert not cli_result.exception, cli_result.output assert cli_result.exit_code == 0 serializer = PyMongoResultSerializer( @@ -56,25 +69,41 @@ def test_main(mongo_host): assert result.raw_ipynb_json assert result.pdf == pdf_contents + @pytest.mark.parametrize( - ('cli_args', 'expected_mailto', 'expected_from',), + ( + "cli_args", + "expected_mailto", + "expected_from", + ), [ ( - ['--report-name', 'crashyreport', '--mailto', 'happy@email', '--mailfrom', 'notebooker@example.com'], - 'happy@email', - 'notebooker@example.com', - ), ( - ['--report-name', 'crashyreport', '--mailto', 'happy@email', '--error-mailto', 'sad@email', '--mailfrom', 'notebooker@example.com'], - 'sad@email', - 'notebooker@example.com', - ), ( - ['--report-name', 'crashyreport', '--error-mailto', 'sad@email', '--mailfrom', 'notebooker@example.com'], - 'sad@email', - 'notebooker@example.com', - ) - ] + ["--report-name", "crashyreport", "--mailto", "happy@email", "--mailfrom", "notebooker@example.com"], + "happy@email", + "notebooker@example.com", + ), + ( + [ + "--report-name", + "crashyreport", + "--mailto", + "happy@email", + "--error-mailto", + "sad@email", + "--mailfrom", + "notebooker@example.com", + ], + "sad@email", + "notebooker@example.com", + ), + ( + ["--report-name", "crashyreport", "--error-mailto", "sad@email", "--mailfrom", "notebooker@example.com"], + "sad@email", + "notebooker@example.com", + ), + ], ) -def test_main(mongo_host, cli_args, expected_mailto, expected_from): +def test_erroring_notebook_with_emails(mongo_host, cli_args, expected_mailto, expected_from): with mock.patch("notebooker.execute_notebook.pm.execute_notebook") as exec_nb, mock.patch( "notebooker.utils.conversion.jupytext.read" ) as read_nb, mock.patch("notebooker.execute_notebook.send_result_email") as send_email: @@ -96,11 +125,10 @@ def test_main(mongo_host, cli_args, expected_mailto, expected_from): "execute-notebook", "--job-id", job_id, - ] + cli_args, + ] + + cli_args, ) - mailto = send_email.call_args_list[0][0][0].mailto mailfrom = send_email.call_args_list[0][0][0].mailfrom assert mailto == expected_mailto assert mailfrom == expected_from - diff --git a/tests/integration/test_report_hunter.py b/tests/integration/test_report_hunter.py index e1a00fd8..f83f4b4e 100644 --- a/tests/integration/test_report_hunter.py +++ b/tests/integration/test_report_hunter.py @@ -164,7 +164,7 @@ def test_report_hunter_pending_to_done(bson_library, webapp_config): update_time=datetime.datetime(2018, 1, 12, 2, 37), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), job_finish_time=datetime.datetime(2018, 1, 12, 2, 37), - pdf="abc", + pdf=b"abc", raw_html="rawstuff", email_html="emailstuff", raw_html_resources={"outputs": {}, "inlining": []}, diff --git a/tests/integration/utils/test_conversion.py b/tests/integration/utils/test_conversion.py new file mode 100644 index 00000000..3efb0c54 --- /dev/null +++ b/tests/integration/utils/test_conversion.py @@ -0,0 +1,85 @@ +import json +import os +import shutil +import tempfile +import uuid + +import mock +import pytest +from click.testing import CliRunner + +from notebooker import convert_to_py +from notebooker.utils import conversion +from notebooker.utils.caching import set_cache +from notebooker.utils.conversion import _output_ipynb_name + + +@pytest.mark.parametrize( + "file_type", + [ + "py", + "ipynb", + ], +) +def test_generate_ipynb_from_py(file_type, setup_and_cleanup_notebooker_filesystem, webapp_config, flask_app): + python_dir = webapp_config.PY_TEMPLATE_BASE_DIR + with flask_app.app_context(): + set_cache("latest_sha", "fake_sha_early") + + os.mkdir(python_dir + "/extra_path") + with open(os.path.join(python_dir, "extra_path", f"test_report.{file_type}"), "w") as f: + if file_type == "py": + f.write("#hello world\n") + elif file_type == "ipynb": + f.write( + json.dumps( + { + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": ["import datetime"], + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2, + } + ) + ) + report_path = os.sep.join(["extra_path", "test_report"]) + with mock.patch("notebooker.utils.conversion.git.repo.Repo") as repo: + repo().commit().hexsha = "fake_sha_early" + conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) + repo().commit().hexsha = "fake_sha_later" + conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) + conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) + + expected_filename = _output_ipynb_name(report_path) + expected_ipynb_path = os.path.join(python_dir, "fake_sha_early", expected_filename) + assert os.path.exists(expected_ipynb_path), f".ipynb at {expected_ipynb_path} was not generated as expected!" + expected_ipynb_path = os.path.join(python_dir, "fake_sha_later", expected_filename) + assert os.path.exists(expected_ipynb_path), ".ipynb was not generated as expected!" + + uuid_1 = uuid.UUID(int=12345) + uuid_2 = uuid.UUID(int=67890) + with mock.patch("notebooker.utils.conversion.uuid.uuid4") as uuid4: + with mock.patch("notebooker.utils.conversion._template") as template: + conversion.python_template_dir = lambda *a, **kw: None + uuid4.return_value = uuid_1 + template.return_value = python_dir + f"/extra_path/test_report.{file_type}" + conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", False, py_template_dir="") + + expected_ipynb_path = os.path.join(python_dir, str(uuid_1), expected_filename) + assert os.path.exists(expected_ipynb_path), f".ipynb at {expected_ipynb_path} was not generated as expected!" + + with mock.patch("notebooker.utils.conversion.uuid.uuid4") as uuid4: + conversion.python_template_dir = lambda *a, **kw: python_dir + conversion.NOTEBOOKER_DISABLE_GIT = True + uuid4.return_value = uuid_2 + conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", True, py_template_dir=python_dir) + + expected_ipynb_path = os.path.join(python_dir, str(uuid_2), expected_filename) + assert os.path.exists(expected_ipynb_path), ".ipynb was not generated as expected!" diff --git a/tests/unit/utils/test_conversion.py b/tests/unit/utils/test_conversion.py index 8061c398..7feae994 100644 --- a/tests/unit/utils/test_conversion.py +++ b/tests/unit/utils/test_conversion.py @@ -14,77 +14,6 @@ from notebooker.utils.conversion import _output_ipynb_name -@pytest.mark.parametrize( - "file_type", - [ - "py", - "ipynb", - ], -) -def test_generate_ipynb_from_py(file_type, setup_and_cleanup_notebooker_filesystem, webapp_config, flask_app): - python_dir = webapp_config.PY_TEMPLATE_BASE_DIR - with flask_app.app_context(): - set_cache("latest_sha", "fake_sha_early") - - os.mkdir(python_dir + "/extra_path") - with open(os.path.join(python_dir, "extra_path", f"test_report.{file_type}"), "w") as f: - if file_type == "py": - f.write("#hello world\n") - elif file_type == "ipynb": - f.write( - json.dumps( - { - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": ["import datetime"], - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2, - } - ) - ) - report_path = os.sep.join(["extra_path", "test_report"]) - with mock.patch("notebooker.utils.conversion.git.repo.Repo") as repo: - repo().commit().hexsha = "fake_sha_early" - conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) - repo().commit().hexsha = "fake_sha_later" - conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) - conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) - - expected_filename = _output_ipynb_name(report_path) - expected_ipynb_path = os.path.join(python_dir, "fake_sha_early", expected_filename) - assert os.path.exists(expected_ipynb_path), f".ipynb at {expected_ipynb_path} was not generated as expected!" - expected_ipynb_path = os.path.join(python_dir, "fake_sha_later", expected_filename) - assert os.path.exists(expected_ipynb_path), ".ipynb was not generated as expected!" - - uuid_1 = uuid.UUID(int=12345) - uuid_2 = uuid.UUID(int=67890) - with mock.patch("notebooker.utils.conversion.uuid.uuid4") as uuid4: - with mock.patch("notebooker.utils.conversion._template") as template: - conversion.python_template_dir = lambda *a, **kw: None - uuid4.return_value = uuid_1 - template.return_value = python_dir + f"/extra_path/test_report.{file_type}" - conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", False, py_template_dir="") - - expected_ipynb_path = os.path.join(python_dir, str(uuid_1), expected_filename) - assert os.path.exists(expected_ipynb_path), f".ipynb at {expected_ipynb_path} was not generated as expected!" - - with mock.patch("notebooker.utils.conversion.uuid.uuid4") as uuid4: - conversion.python_template_dir = lambda *a, **kw: python_dir - conversion.NOTEBOOKER_DISABLE_GIT = True - uuid4.return_value = uuid_2 - conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", True, py_template_dir=python_dir) - - expected_ipynb_path = os.path.join(python_dir, str(uuid_2), expected_filename) - assert os.path.exists(expected_ipynb_path), ".ipynb was not generated as expected!" - - def test_generate_py_from_ipynb(): ipynb_dir = tempfile.mkdtemp() py_dir = tempfile.mkdtemp()