Skip to content

Commit

Permalink
Merge pull request #44 from openforcefield/skip-failing-notebooks
Browse files Browse the repository at this point in the history
Skip failing notebooks
  • Loading branch information
Yoshanuikabundi committed May 20, 2024
2 parents bece688 + 3af7121 commit 1768bbb
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 26 deletions.
52 changes: 49 additions & 3 deletions .github/workflows/cookbook_preproc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,13 @@ jobs:
- name: Pre-process and execute notebooks
shell: bash -l {0}
run: |
python source/_ext/proc_examples.py --prefix=deploy/ --cache-branch=${DEPLOY_BRANCH}
set -e
python source/_ext/proc_examples.py --prefix=deploy/ --cache-branch=${DEPLOY_BRANCH} --log-failures=notebooks_log.json
- name: Read notebooks log
if: always()
shell: bash -l {0}
run: echo "NOTEBOOKS_LOG=$(cat 'notebooks_log.json')" >> "$GITHUB_ENV"

- name: Deploy cache
shell: bash -l {0}
Expand Down Expand Up @@ -121,6 +127,7 @@ jobs:
curl -X POST -d "branches=$GITHUB_REF_NAME" -d "token=${{ secrets.RTD_WEBHOOK_TOKEN }}" https://readthedocs.org/api/v2/webhook/openff-docs/243876/
- name: Report status to PR
id: reportStatusToPr
if: always() && github.event_name == 'workflow_dispatch' && inputs.pr_number != ''
uses: thollander/actions-comment-pull-request@v2
with:
Expand All @@ -137,10 +144,49 @@ jobs:
- Deployment branch: ${{ env.DEPLOY_BRANCH }}
- Status: **${{ job.status }}**
- Job status: **${{ job.status }}**
- Notebooks status: ${{fromJSON(env.NOTEBOOKS_LOG).n_successful}} / ${{fromJSON(env.NOTEBOOKS_LOG).n_total}} notebooks successfully executed (${{fromJSON(env.NOTEBOOKS_LOG).n_ignored}} failures ignored)
${{(fromJSON(env.NOTEBOOKS_LOG).failed || fromJSON(env.NOTEBOOKS_LOG).ignored) && '- Failing notebooks:
- ' || ''}}${{join(fromJSON(env.NOTEBOOKS_LOG).failed, '
- ')}}${{fromJSON(env.NOTEBOOKS_LOG).ignored && '
- [ignored] ' || ''}}${{join(fromJSON(env.NOTEBOOKS_LOG).ignored, '
- [ignored] ')}}
Changes will only be visible in the ReadTheDocs
preview after it has been [rebuilt].
[${{ github.run_id }}]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
[rebuilt]: https://readthedocs.org/projects/openff-docs/builds/
- name: Report status to PR on templating failure
if: always() && steps.reportStatusToPr.outcome == 'failure'
uses: thollander/actions-comment-pull-request@v2
with:
pr_number: ${{ inputs.pr_number }}
message: >
A workflow dispatched to regenerate the cookbook cache for this PR has just finished.
- Run ID: [${{ github.run_id }}]
- Triggering actor: ${{ github.triggering_actor }}
- Target branch: ${{ github.ref_name }}
- Deployment branch: ${{ env.DEPLOY_BRANCH }}
- Job status: **${{ job.status }}**
- Notebooks status: N/A
If the workflow was successful, changes will only be visible in the ReadTheDocs
Changes will only be visible in the ReadTheDocs
preview after it has been [rebuilt].
Expand Down
4 changes: 2 additions & 2 deletions devtools/conda-envs/examples_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ dependencies:
- requests
- packaging
# Examples
- openff-toolkit-examples>=0.15.2
- openff-interchange >=0.3.25
- openff-toolkit-examples>=0.16.0
- openff-interchange>=0.3.26
- openff-nagl
# - openff-fragmenter
# - openff-qcsubmit
Expand Down
25 changes: 19 additions & 6 deletions source/_ext/cookbook/globals_.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,30 @@
DEFAULT_CACHE_BRANCH = "_cookbook_data_main"
"""Branch of the openff-docs repository where cached notebooks are stored."""

SKIP_NOTEBOOKS: set[str] = {
"openforcefield/openff-interchange/experimental/openmmforcefields/gaff.ipynb",
}
SKIP_NOTEBOOKS: set[str] = {}
"""
Notebooks that will not be processed.
This is intended to be used as a way of temporarily disabling broken notebooks
without taking down an entire source repository or the examples page itself.
Specified as a path relative to a notebook search path, eg ``SRC_IPYNB_ROOT``.
This is something like ``{repo_owner}/{repo_name}/{path_from_examples_dir}``.
These notebooks are skipped at the proc_examples stage, but they will still
be rendered if they're in a cache.
"""

OPTIONAL_NOTEBOOKS: list[str] = [
# ".*/experimental/.*",
"openforcefield/openff-interchange/experimental/openmmforcefields/gaff.ipynb",
]
"""
Notebooks whose execution failure will not cause notebook processing to fail.
This is intended to be used as a way of temporarily disabling broken notebooks
without taking down an entire source repository or the examples page itself.
They will be executed and included in the examples page if successful, but if
they fail they will be removed.
Specified as a regex matching a path relative to a notebook search path, eg
``SRC_IPYNB_ROOT``. This is something like ``{repo_owner}/{repo_name}/
{path_from_examples_dir}``. These notebooks are skipped at the proc_examples
stage, but they will still be rendered if they're in a cache.
"""
43 changes: 35 additions & 8 deletions source/_ext/cookbook/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
from functools import partial
from types import FunctionType
from typing import Callable, Iterable, Iterator, Optional, TypeVar, Generator
from typing import (
Callable,
Generic,
Iterable,
Iterator,
Optional,
TypeVar,
Generator,
ParamSpec,
Union,
)
import contextlib
import os
import re

T = TypeVar("T")
E = TypeVar("E")
P = ParamSpec("P")


def flatten(iterable: Iterable[Iterable[T]]) -> Generator[T, None, None]:
Expand All @@ -20,14 +34,20 @@ def next_or_none(iterator: Iterator[T]) -> Optional[T]:
return ret


def result(fn, exception=Exception):
def ret(*args, **kwargs):
try:
return fn(*args, **kwargs)
except exception as e:
return e
def result(
fn: Callable[P, T], exception: type[E], *args: P.args, **kwargs: P.kwargs
) -> Union[T, E]:
try:
return fn(*args, **kwargs)
except exception as e:
return e

return ret

def to_result(
fn: Callable[P, T], exception: type[E] = Exception
) -> Callable[P, Union[T, E]]:
# partial's type stub is not precise enough to work here
return partial(result, fn, exception) # type: ignore [reportReturnType]


@contextlib.contextmanager
Expand All @@ -54,3 +74,10 @@ def set_env(**environ):
finally:
os.environ.clear()
os.environ.update(old_environ)


def in_regexes(s: str, regexes: Iterable[str]) -> bool:
for regex in regexes:
if re.match(regex, s):
return True
return False
102 changes: 95 additions & 7 deletions source/_ext/proc_examples.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Script to execute and pre-process example notebooks"""

import re
from typing import Tuple, List, Final
from zipfile import ZIP_DEFLATED, ZipFile
from pathlib import Path
Expand All @@ -10,6 +11,7 @@
import sys
import tarfile
from functools import partial
import traceback

import nbformat
from nbconvert.preprocessors.execute import ExecutePreprocessor
Expand All @@ -30,7 +32,14 @@
)
from cookbook.github import download_dir, get_tag_matching_installed_version
from cookbook.globals_ import *
from cookbook.utils import set_env
from cookbook.utils import set_env, to_result, in_regexes


class NotebookExceptionError(ValueError):
def __init__(self, src: str, exc: Exception):
self.src: str = str(src)
self.exc: Exception = exc
self.tb: str = "".join(traceback.format_exception(exc, chain=False))


def needed_files(notebook_path: Path) -> List[Tuple[Path, Path]]:
Expand Down Expand Up @@ -208,7 +217,8 @@ def execute_notebook(
try:
executor.preprocess(nb, {"metadata": {"path": src.parent}})
except Exception as e:
raise ValueError(f"Exception encountered while executing {src_rel}")
print("Failed to execute", src.relative_to(SRC_IPYNB_ROOT))
raise NotebookExceptionError(str(src_rel), e)

# Store the tag used to execute the notebook in metadata
set_metadata(nb, "src_repo_tag", tag)
Expand All @@ -230,7 +240,7 @@ def execute_notebook(
EXEC_IPYNB_ROOT / thumbnail_path.relative_to(SRC_IPYNB_ROOT),
)

print("Done executing", src.relative_to(SRC_IPYNB_ROOT))
print("Successfully executed", src.relative_to(SRC_IPYNB_ROOT))


def delay_iterator(iterator, seconds=1.0):
Expand Down Expand Up @@ -260,6 +270,8 @@ def main(
do_exec=True,
prefix: Path | None = None,
processes: int | None = None,
failed_notebooks_log: Path | None = None,
allow_failures: bool = False,
):
print("Working in", Path().resolve())

Expand Down Expand Up @@ -287,7 +299,6 @@ def main(
for notebook in find_notebooks(dst_path)
if str(notebook.relative_to(SRC_IPYNB_ROOT)) not in SKIP_NOTEBOOKS
)
print(notebooks, SKIP_NOTEBOOKS)

# Create Colab and downloadable versions of the notebooks
if do_proc:
Expand All @@ -299,20 +310,63 @@ def main(
create_download(notebook)

# Execute notebooks in parallel for rendering as HTML
execution_failed = False
if do_exec:
shutil.rmtree(EXEC_IPYNB_ROOT, ignore_errors=True)
# Context manager ensures the pool is correctly terminated if there's
# an exception
with Pool(processes=processes) as pool:
# Wait a second between launching subprocesses
# Workaround https://github.com/jupyter/nbconvert/issues/1066
_ = [
*pool.imap_unordered(
partial(execute_notebook, cache_branch=cache_branch),
exec_results = [
*pool.imap(
to_result(
partial(execute_notebook, cache_branch=cache_branch),
NotebookExceptionError,
),
delay_iterator(notebooks),
)
]

exceptions: list[NotebookExceptionError] = [
result for result in exec_results if isinstance(result, Exception)
]
ignored_exceptions = [
exc for exc in exceptions if in_regexes(exc.src, OPTIONAL_NOTEBOOKS)
]

if exceptions:
for exception in exceptions:
print(
"-" * 80
+ "\n"
+ f"{exception.src} failed. Traceback:\n\n{exception.tb}"
)
if not in_regexes(exception.src, OPTIONAL_NOTEBOOKS):
execution_failed = True
print(f"The following {len(exceptions)}/{len(notebooks)} notebooks failed:")
for exception in exceptions:
print(" ", exception.src)
print("For tracebacks, see above.")

if failed_notebooks_log is not None:
print(f"Writing log to {failed_notebooks_log.absolute()}")
failed_notebooks_log.write_text(
json.dumps(
{
"n_successful": len(notebooks) - len(exceptions),
"n_total": len(notebooks),
"n_ignored": len(ignored_exceptions),
"failed": [
exc.src
for exc in exceptions
if exc not in ignored_exceptions
],
"ignored": [exc.src for exc in ignored_exceptions],
}
)
)

if isinstance(prefix, Path):
prefix.mkdir(parents=True, exist_ok=True)

Expand All @@ -327,6 +381,9 @@ def main(
prefix / directory.relative_to(OPENFF_DOCS_ROOT),
)

if execution_failed:
exit(1)


if __name__ == "__main__":
import sys, os
Expand Down Expand Up @@ -365,10 +422,41 @@ def main(
"Specify cache branch in a single argument: `--cache-branch=<branch>`"
)

# --log-failures is the path to store a list of failing notebooks in
failed_notebooks_log = None
for arg in sys.argv:
if arg.startswith("--log-failures="):
failed_notebooks_log = Path(arg[15:])
if "--log-failures" in sys.argv:
raise ValueError(
"Specify path to log file in a single argument: `--log-failures=<path>`"
)

# if --allow-failures is True, do not exit with error code 1 if a
# notebook fails
allow_failures = "false"
for arg in sys.argv:
if arg.startswith("--allow-failures="):
allow_failures = arg[17:].lower()
if allow_failures in ["true", "1", "y", "yes", "t"]:
allow_failures = True
elif allow_failures in ["false", "0", "n", "no", "false"]:
allow_failures = False
else:
raise ValueError(
f"Didn't understand value of --allow-failures {allow_failures}; try `true` or `false`"
)
if "--log-failures" in sys.argv:
raise ValueError(
"Specify value in a single argument: `--allow-failures=true` or `--allow-failures=false`"
)

main(
cache_branch=cache_branch,
do_proc=not "--skip-proc" in sys.argv,
do_exec=not "--skip-exec" in sys.argv,
prefix=prefix,
processes=processes,
failed_notebooks_log=failed_notebooks_log,
allow_failures=allow_failures,
)

0 comments on commit 1768bbb

Please sign in to comment.