Skip to content

Commit

Permalink
Only fix cell ID validation issues if asked (#244)
Browse files Browse the repository at this point in the history
* Only fix cell ID validation issues if asked

Adds a new kwarg to validate called `repair` which will fix well-known validation
errors during the validation process, but only when set to `True`.

Updates existing functionality to not mutate the notebook contents, but instead
fail validation.

* Remove python 3.5 from ci test matrix

* explicitly exclude jupyter-trust script from manifest

* Ensure that validate default mutates notebook content to repair cell ids

* Correct kwarg name for clarity
  • Loading branch information
nicholaswold committed Jan 21, 2022
1 parent dee3e4f commit d073357
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
OS: ['ubuntu-latest', 'windows-latest']
PYTHON_VERSION: ['3.5', '3.6', '3.7', '3.8', '3.9']
PYTHON_VERSION: ['3.6', '3.7', '3.8', '3.9']
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ __pycache__
.#*
.coverage
.cache
.python-version
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include MANIFEST.in
recursive-include nbformat *.txt
recursive-include nbformat *.json
recursive-exclude nbformat/tests *.*
exclude scripts/jupyter-trust

# Javascript
include package.json
Expand Down
50 changes: 42 additions & 8 deletions nbformat/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.

import json
import os
import re

Expand Down Expand Up @@ -212,17 +213,48 @@ def test_fallback_validator_with_iter_errors_using_ref():

def test_non_unique_cell_ids():
"""Test than a non-unique cell id does not pass validation"""
import nbformat
with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
nb = read(f, as_version=4)
# The read call corrects the error and only logs the validation issue, so we reapply the issue for the test after
nb.cells[1].id = nb.cells[0].id
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
with pytest.raises(ValidationError):
validate(nb)
# The validate call should have corrected the duplicate id entry
validate(nb, repair_duplicate_cell_ids=False)
# try again to verify that we didn't modify the content
with pytest.raises(ValidationError):
validate(nb, repair_duplicate_cell_ids=False)


def test_repair_non_unique_cell_ids():
"""Test that we will repair non-unique cell ids if asked during validation"""
import nbformat
with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
validate(nb)
assert isvalid(nb)


def test_no_cell_ids():
"""Test that a cell without a cell ID does not pass validation"""
import nbformat
with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
with pytest.raises(ValidationError):
validate(nb, repair_duplicate_cell_ids=False)
# try again to verify that we didn't modify the content
with pytest.raises(ValidationError):
validate(nb, repair_duplicate_cell_ids=False)


def test_repair_no_cell_ids():
"""Test that we will repair cells without ids if asked during validation"""
import nbformat
with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
# Avoids validate call from `.read`
nb = nbformat.from_dict(json.load(f))
validate(nb)
assert isvalid(nb)
# Reapply to id duplication issue
nb.cells[1].id = nb.cells[0].id
assert not isvalid(nb)


def test_invalid_cell_id():
Expand All @@ -233,11 +265,13 @@ def test_invalid_cell_id():
validate(nb)
assert not isvalid(nb)


def test_notebook_invalid_without_min_version():
with TestsBase.fopen(u'no_min_version.ipynb', u'r') as f:
nb = read(f, as_version=4)
with pytest.raises(ValidationError):
validate(nb)


def test_notebook_invalid_without_main_version():
pass
34 changes: 34 additions & 0 deletions nbformat/tests/v4_5_no_cell_id.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"foo\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
21 changes: 14 additions & 7 deletions nbformat/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from .reader import get_version, reads
from .corpus.words import generate_corpus_id

from traitlets.log import get_logger


validators = {}

def _relax_additional_properties(obj):
Expand Down Expand Up @@ -230,7 +233,7 @@ def better_validation_error(error, version, version_minor):


def validate(nbdict=None, ref=None, version=None, version_minor=None,
relax_add_props=False, nbjson=None):
relax_add_props=False, nbjson=None, repair_duplicate_cell_ids=True):
"""Checks whether the given notebook dict-like object
conforms to the relevant notebook format schema.
Expand Down Expand Up @@ -258,8 +261,9 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
if version is None:
version, version_minor = 1, 0

if ref is None and version >= 4 and version_minor >= 5:
# if we support cell ids ensure default ids are provided
notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
if notebook_supports_cell_ids and repair_duplicate_cell_ids:
# Auto-generate cell ids for cells that are missing them.
for cell in nbdict['cells']:
if 'id' not in cell:
# Generate cell ids if any are missing
Expand All @@ -270,15 +274,18 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
relax_add_props=relax_add_props):
raise error

if ref is None and version >= 4 and version_minor >= 5:
if notebook_supports_cell_ids:
# if we support cell ids check for uniqueness when validating the whole notebook
seen_ids = set()
for cell in nbdict['cells']:
cell_id = cell['id']
if cell_id in seen_ids:
cell['id'] = generate_corpus_id()
# Best effort to repair if we find a duplicate id in case the ValidationError isn't blocking
raise ValidationError("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
if repair_duplicate_cell_ids:
# Best effort to repair if we find a duplicate id
cell['id'] = generate_corpus_id()
get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
else:
raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id))
seen_ids.add(cell_id)


Expand Down

0 comments on commit d073357

Please sign in to comment.