Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
0.4.6 (2022-??-??)
0.5.0 (2023-??-??)
------------------

* Feature: Added support for [Reveal.js](https://revealjs.com/) notebook outputs
* Bugfix: Small bugfix for synchronous report execution
* Improvement: Delete functionality in mongo now also deletes files from GridFS


0.4.5 (2022-09-29)
------------------

Expand Down
17 changes: 10 additions & 7 deletions notebooker/_entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,10 @@ def filesystem_default_value(dirname):
help="If selected, notebooker will not try to pull the latest version of python templates from git.",
)
@click.option(
"--default-mailfrom",
default=DEFAULT_MAILFROM_ADDRESS,
help="Set a new value for the default mailfrom setting."
"--default-mailfrom", default=DEFAULT_MAILFROM_ADDRESS, help="Set a new value for the default mailfrom setting."
)
@click.option(
"--running-timeout",
default=DEFAULT_RUNNING_TIMEOUT,
help="Timeout in minutes for report execution",
type=int
"--running-timeout", default=DEFAULT_RUNNING_TIMEOUT, help="Timeout in minutes for report execution", type=int
)
@click.option(
"--serializer-cls",
Expand Down Expand Up @@ -207,6 +202,12 @@ def start_webapp(
default=None,
help="Use this email in the From header of any sent email. If not passed, --default-mailfrom will be used",
)
@click.option(
"--is-slideshow",
default=False,
is_flag=True,
help="If specified, the notebook template's output will be treated as a Reveal.js slideshow.",
)
@pass_config
def execute_notebook(
config: BaseConfig,
Expand All @@ -224,6 +225,7 @@ def execute_notebook(
prepare_notebook_only,
scheduler_job_id,
mailfrom,
is_slideshow,
):
if report_name is None:
raise ValueError("Error! Please provide a --report-name.")
Expand All @@ -243,6 +245,7 @@ def execute_notebook(
prepare_notebook_only,
scheduler_job_id,
mailfrom,
is_slideshow=is_slideshow,
)


Expand Down
8 changes: 7 additions & 1 deletion notebooker/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class NotebookResultBase(object):
stdout = attr.ib(default=attr.Factory(list))
scheduler_job_id = attr.ib(default=None)
mailfrom = attr.ib(default=None)
is_slideshow = attr.ib(default=False)

def saveable_output(self):
out = attr.asdict(self)
Expand All @@ -103,6 +104,7 @@ class NotebookResultPending(NotebookResultBase):
hide_code = attr.ib(default=False)
scheduler_job_id = attr.ib(default=None)
mailfrom = attr.ib(default=None)
is_slideshow = attr.ib(default=False)


@attr.s()
Expand All @@ -117,6 +119,7 @@ class NotebookResultError(NotebookResultBase):
hide_code = attr.ib(default=False)
scheduler_job_id = attr.ib(default=None)
mailfrom = attr.ib(default=None)
is_slideshow = attr.ib(default=False)

@property
def email_subject(self):
Expand Down Expand Up @@ -158,6 +161,7 @@ class NotebookResultComplete(NotebookResultBase):
stdout = attr.ib(default=attr.Factory(list))
scheduler_job_id = attr.ib(default=None)
mailfrom = attr.ib(default=None)
is_slideshow = attr.ib(default=False)

def html_resources(self):
"""We have to save the raw images using Mongo GridFS - figure out where they will go here"""
Expand Down Expand Up @@ -189,6 +193,7 @@ def saveable_output(self):
"scheduler_job_id": self.scheduler_job_id,
"raw_html": "", # backwards compatibility for versions<0.3.1
"mailfrom": self.mailfrom,
"is_slideshow": self.is_slideshow,
}

def __repr__(self):
Expand All @@ -197,7 +202,7 @@ def __repr__(self):
"job_start_time={job_start_time}, job_finish_time={job_finish_time}, update_time={update_time}, "
"report_title={report_title}, overrides={overrides}, mailto={mailto}, mailfrom={mailfrom}"
"email_subject={email_subject}, generate_pdf_output={generate_pdf_output}, hide_code={hide_code}, "
"scheduler_job_id={scheduler_job_id})".format(
"scheduler_job_id={scheduler_job_id}, is_slideshow={is_slideshow})".format(
job_id=self.job_id,
status=self.status,
report_name=self.report_name,
Expand All @@ -212,5 +217,6 @@ def __repr__(self):
generate_pdf_output=self.generate_pdf_output,
hide_code=self.hide_code,
scheduler_job_id=self.scheduler_job_id,
is_slideshow=self.is_slideshow,
)
)
33 changes: 27 additions & 6 deletions notebooker/execute_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def _run_checks(
py_template_subdir: str = "",
scheduler_job_id: Optional[str] = None,
mailfrom: Optional[str] = None,
is_slideshow: bool = False,
) -> NotebookResultComplete:
"""
This is the actual method which executes a notebook, whether running in the webapp or via the entrypoint.
Expand Down Expand Up @@ -77,7 +78,8 @@ def _run_checks(
If available, it will be part of the Error or Completed run report.
mailfrom : `Optional[str]`
If available, this will be the email used in the From header.

is_slideshow: bool
Whether or not the output of this should use the equivalent of nbconvert --to slides

Returns
-------
Expand All @@ -102,14 +104,18 @@ def _run_checks(

logger.info("Executing notebook at {} using parameters {} --> {}".format(ipynb_raw_path, overrides, output_ipynb))
pm.execute_notebook(
ipynb_raw_path, ipynb_executed_path, parameters=overrides, log_output=True, prepare_only=prepare_only
ipynb_raw_path,
ipynb_executed_path,
parameters=overrides,
log_output=True,
prepare_only=prepare_only,
)
with open(ipynb_executed_path, "r") as f:
raw_executed_ipynb = f.read()

logger.info("Saving output notebook as HTML from {}".format(ipynb_executed_path))
html, resources = ipython_to_html(ipynb_executed_path, job_id)
email_html, _ = ipython_to_html(ipynb_executed_path, job_id, hide_code=hide_code)
html, resources = ipython_to_html(ipynb_executed_path, job_id, is_slideshow=is_slideshow)
email_html, _ = ipython_to_html(ipynb_executed_path, job_id, hide_code=hide_code, is_slideshow=is_slideshow)
pdf = ipython_to_pdf(raw_executed_ipynb, report_title, hide_code=hide_code) if generate_pdf_output else ""

notebook_result = NotebookResultComplete(
Expand All @@ -129,6 +135,7 @@ def _run_checks(
overrides=overrides,
scheduler_job_id=scheduler_job_id,
mailfrom=mailfrom,
is_slideshow=is_slideshow,
)
return notebook_result

Expand All @@ -154,12 +161,17 @@ def run_report(
py_template_subdir="",
scheduler_job_id=None,
mailfrom=None,
is_slideshow=False,
):

job_id = job_id or str(uuid.uuid4())
stop_execution = os.getenv("NOTEBOOKER_APP_STOPPING")
if stop_execution:
logger.info("Aborting attempt to run %s, jobid=%s as app is shutting down.", report_name, job_id)
logger.info(
"Aborting attempt to run %s, jobid=%s as app is shutting down.",
report_name,
job_id,
)
result_serializer.update_check_status(job_id, JobStatus.CANCELLED, error_info=CANCEL_MESSAGE)
return
try:
Expand All @@ -170,7 +182,10 @@ def run_report(
attempts_remaining,
)
result_serializer.update_check_status(
job_id, report_name=report_name, job_start_time=job_submit_time, status=JobStatus.PENDING
job_id,
report_name=report_name,
job_start_time=job_submit_time,
status=JobStatus.PENDING,
)
result = _run_checks(
job_id,
Expand All @@ -190,6 +205,7 @@ def run_report(
py_template_subdir=py_template_subdir,
scheduler_job_id=scheduler_job_id,
mailfrom=mailfrom,
is_slideshow=is_slideshow,
)
logger.info("Successfully got result.")
result_serializer.save_check_result(result)
Expand All @@ -208,6 +224,7 @@ def run_report(
generate_pdf_output=generate_pdf_output,
scheduler_job_id=scheduler_job_id,
mailfrom=mailfrom,
is_slideshow=is_slideshow,
)
logger.error(
"Report run failed. Saving error result to mongo library %s@%s...",
Expand Down Expand Up @@ -239,6 +256,7 @@ def run_report(
py_template_subdir=py_template_subdir,
scheduler_job_id=scheduler_job_id,
mailfrom=mailfrom,
is_slideshow=is_slideshow,
)
else:
logger.info("Abandoning attempt to run report. It failed too many times.")
Expand Down Expand Up @@ -327,6 +345,7 @@ def execute_notebook_entrypoint(
prepare_notebook_only: bool,
scheduler_job_id: Optional[str],
mailfrom: Optional[str],
is_slideshow: bool,
):
report_title = report_title or report_name
output_dir, template_dir, _ = initialise_base_dirs(output_dir=config.OUTPUT_DIR, template_dir=config.TEMPLATE_DIR)
Expand All @@ -351,6 +370,7 @@ def execute_notebook_entrypoint(
logger.info("mailfrom = %s" % mailfrom)
logger.info("pdf_output = %s", pdf_output)
logger.info("hide_code = %s", hide_code)
logger.info("is_slideshow = %s", is_slideshow)
logger.info("prepare_notebook_only = %s", prepare_notebook_only)
logger.info("scheduler job id = %s", scheduler_job_id)
logger.info("notebooker_disable_git = %s", notebooker_disable_git)
Expand Down Expand Up @@ -384,6 +404,7 @@ def execute_notebook_entrypoint(
py_template_subdir=py_template_subdir,
scheduler_job_id=scheduler_job_id,
mailfrom=mailfrom,
is_slideshow=is_slideshow,
)
if result.mailto:
send_result_email(result, config.DEFAULT_MAILFROM)
Expand Down
33 changes: 33 additions & 0 deletions notebooker/notebook_templates_example/sample/slideshow_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.3.0
# kernelspec:
# display_name: py3-local-pegasus
# language: python
# name: env
# ---

# + tags=["parameters"]
the_range = 10
# -

# + [markdown] slideshow={"slide_type": "slide"}
# # This is slide number one

# + [markdown] slideshow={"slide_type": "subslide"}
# ## This is slide two
#
# - I have some
# - things to talk
# - about.

# + [markdown] slideshow={"slide_type": "slide"}
# # Okay onto the good stuff!
# -

list(range(the_range))
12 changes: 7 additions & 5 deletions notebooker/serialization/mongo.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import datetime
import json
from collections import Counter, defaultdict

from abc import ABC
from collections import defaultdict
from logging import getLogger
from typing import Any, AnyStr, Dict, List, Optional, Tuple, Union, Iterator

import click
import gridfs
import pymongo
from abc import ABC
from gridfs import NoFile

from notebooker.constants import JobStatus, NotebookResultComplete, NotebookResultError, NotebookResultPending
Expand Down Expand Up @@ -53,7 +52,6 @@ def read_bytes_file(result_data_store, path):


def load_files_from_gridfs(result_data_store: gridfs.GridFS, result: Dict, do_read=True) -> List[str]:

gridfs_filenames = []
all_html_output_paths = result.get("raw_html_resources", {}).get("outputs", [])
gridfs_filenames.extend(all_html_output_paths)
Expand Down Expand Up @@ -89,7 +87,6 @@ def load_files_from_gridfs(result_data_store: gridfs.GridFS, result: Dict, do_re

class MongoResultSerializer(ABC):
# This class is the interface between Mongo and the rest of the application

def __init__(self, database_name="notebooker", mongo_host="localhost", result_collection_name="NOTEBOOK_OUTPUT"):
self.database_name = database_name
self.mongo_host = mongo_host
Expand Down Expand Up @@ -193,6 +190,7 @@ def save_check_stub(
generate_pdf_output: bool = True,
hide_code: bool = False,
scheduler_job_id: Optional[str] = None,
is_slideshow: bool = False,
) -> None:
"""Call this when we are just starting a check. Saves a "pending" job into storage."""
job_start_time = job_start_time or datetime.datetime.now()
Expand All @@ -208,6 +206,7 @@ def save_check_stub(
overrides=overrides or {},
hide_code=hide_code,
scheduler_job_id=scheduler_job_id,
is_slideshow=is_slideshow,
)
self._save_to_db(pending_result)

Expand Down Expand Up @@ -303,6 +302,7 @@ def _convert_result(
hide_code=result.get("hide_code", False),
stdout=result.get("stdout", []),
scheduler_job_id=result.get("scheduler_job_id", None),
is_slideshow=result.get("is_slideshow", False),
)
elif cls == NotebookResultPending:
return NotebookResultPending(
Expand All @@ -318,6 +318,7 @@ def _convert_result(
hide_code=result.get("hide_code", False),
stdout=result.get("stdout", []),
scheduler_job_id=result.get("scheduler_job_id", None),
is_slideshow=result.get("is_slideshow", False),
)

elif cls == NotebookResultError:
Expand All @@ -340,6 +341,7 @@ def _convert_result(
hide_code=result.get("hide_code", False),
stdout=result.get("stdout", []),
scheduler_job_id=result.get("scheduler_job_id", False),
is_slideshow=result.get("is_slideshow", False),
)
else:
raise ValueError("Could not deserialise {} into result object.".format(result))
Expand Down
Loading