Only fix cell ID validation issues if asked (#244)

* Only fix cell ID validation issues if asked Adds a new kwarg to validate called `repair` which will fix well-known validation errors during the validation process, but only when set to `True`. Updates existing functionality to not mutate the notebook contents, but instead fail validation. * Remove python 3.5 from ci test matrix * explicitly exclude jupyter-trust script from manifest * Ensure that validate default mutates notebook content to repair cell ids * Correct kwarg name for clarity
jupyter · Jan 21, 2022 · d073357 · d073357
1 parent dee3e4f
commit d073357
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 16 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false 
       matrix:
         OS: ['ubuntu-latest', 'windows-latest']
-        PYTHON_VERSION: ['3.5', '3.6', '3.7', '3.8', '3.9']
+        PYTHON_VERSION: ['3.6', '3.7', '3.8', '3.9']
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python

diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,4 @@ __pycache__
 .#*
 .coverage
 .cache
+.python-version
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -8,6 +8,7 @@ include MANIFEST.in
 recursive-include nbformat *.txt
 recursive-include nbformat *.json
 recursive-exclude nbformat/tests *.*
+exclude scripts/jupyter-trust
 
 # Javascript
 include package.json

diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py
@@ -3,6 +3,7 @@
 # Copyright (c) IPython Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+import json
 import os
 import re
 
@@ -212,17 +213,48 @@ def test_fallback_validator_with_iter_errors_using_ref():
 
 def test_non_unique_cell_ids():
     """Test than a non-unique cell id does not pass validation"""
+    import nbformat
     with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
-        nb = read(f, as_version=4)
-    # The read call corrects the error and only logs the validation issue, so we reapply the issue for the test after
-    nb.cells[1].id = nb.cells[0].id
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
     with pytest.raises(ValidationError):
-        validate(nb)
-    # The validate call should have corrected the duplicate id entry
+        validate(nb, repair_duplicate_cell_ids=False)
+    # try again to verify that we didn't modify the content
+    with pytest.raises(ValidationError):
+        validate(nb, repair_duplicate_cell_ids=False)
+
+
+def test_repair_non_unique_cell_ids():
+    """Test that we will repair non-unique cell ids if asked during validation"""
+    import nbformat
+    with TestsBase.fopen(u'invalid_unique_cell_id.ipynb', u'r') as f:
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
+    validate(nb)
+    assert isvalid(nb)
+
+
+def test_no_cell_ids():
+    """Test that a cell without a cell ID does not pass validation"""
+    import nbformat
+    with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
+    with pytest.raises(ValidationError):
+        validate(nb, repair_duplicate_cell_ids=False)
+    # try again to verify that we didn't modify the content
+    with pytest.raises(ValidationError):
+        validate(nb, repair_duplicate_cell_ids=False)
+
+
+def test_repair_no_cell_ids():
+    """Test that we will repair cells without ids if asked during validation"""
+    import nbformat
+    with TestsBase.fopen(u'v4_5_no_cell_id.ipynb', u'r') as f:
+        # Avoids validate call from `.read`
+        nb = nbformat.from_dict(json.load(f))
+    validate(nb)
     assert isvalid(nb)
-    # Reapply to id duplication issue
-    nb.cells[1].id = nb.cells[0].id
-    assert not isvalid(nb)
 
 
 def test_invalid_cell_id():
@@ -233,11 +265,13 @@ def test_invalid_cell_id():
         validate(nb)
     assert not isvalid(nb)
 
+
 def test_notebook_invalid_without_min_version():
     with TestsBase.fopen(u'no_min_version.ipynb', u'r') as f:
         nb = read(f, as_version=4)
     with pytest.raises(ValidationError):
         validate(nb)
 
+
 def test_notebook_invalid_without_main_version():
     pass
diff --git a/nbformat/tests/v4_5_no_cell_id.ipynb b/nbformat/tests/v4_5_no_cell_id.ipynb
@@ -0,0 +1,34 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"foo\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbformat/validator.py b/nbformat/validator.py
@@ -13,6 +13,9 @@
 from .reader import get_version, reads
 from .corpus.words import generate_corpus_id
 
+from traitlets.log import get_logger
+
+
 validators = {}
 
 def _relax_additional_properties(obj):
@@ -230,7 +233,7 @@ def better_validation_error(error, version, version_minor):
 
 
 def validate(nbdict=None, ref=None, version=None, version_minor=None,
-             relax_add_props=False, nbjson=None):
+             relax_add_props=False, nbjson=None, repair_duplicate_cell_ids=True):
     """Checks whether the given notebook dict-like object
     conforms to the relevant notebook format schema.
 
@@ -258,8 +261,9 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
         if version is None:
             version, version_minor = 1, 0
 
-    if ref is None and version >= 4 and version_minor >= 5:
-        # if we support cell ids ensure default ids are provided
+    notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
+    if notebook_supports_cell_ids and repair_duplicate_cell_ids:
+        # Auto-generate cell ids for cells that are missing them.
         for cell in nbdict['cells']:
             if 'id' not in cell:
                 # Generate cell ids if any are missing
@@ -270,15 +274,18 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
                                relax_add_props=relax_add_props):
         raise error
 
-    if ref is None and version >= 4 and version_minor >= 5:
+    if notebook_supports_cell_ids:
         # if we support cell ids check for uniqueness when validating the whole notebook
         seen_ids = set()
         for cell in nbdict['cells']:
             cell_id = cell['id']
             if cell_id in seen_ids:
-                cell['id'] = generate_corpus_id()
-                # Best effort to repair if we find a duplicate id in case the ValidationError isn't blocking
-                raise ValidationError("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
+                if repair_duplicate_cell_ids:
+                    # Best effort to repair if we find a duplicate id
+                    cell['id'] = generate_corpus_id()
+                    get_logger().warn("Non-unique cell id '{}' detected. Corrected to '{}'.".format(cell_id, cell['id']))
+                else:
+                    raise ValidationError("Non-unique cell id '{}' detected.".format(cell_id))
             seen_ids.add(cell_id)