Merge pull request #65 from minrk/split/join/attachments

exercise attachments, json outputs in v4 tests
jupyter · Nov 21, 2016 · 1ab21fa · 1ab21fa
2 parents 63ba4c1 + 0686b4b
commit 1ab21fa
Show file tree

Hide file tree

Showing 6 changed files with 103 additions and 14 deletions.
diff --git a/nbformat/v4/convert.py b/nbformat/v4/convert.py
@@ -128,6 +128,7 @@ def downgrade_cell(cell):
             cell.cell_type = 'heading'
             cell.source = text
             cell.level = len(prefix)
+    cell.pop('attachments', None)
     return cell
 
 _mime_map = {
@@ -150,10 +151,11 @@ def to_mime_key(d):
 
 def from_mime_key(d):
     """convert dict with mime-type keys to v3 aliases"""
+    d2 = {}
     for alias, mime in _mime_map.items():
         if mime in d:
-            d[alias] = d.pop(mime)
-    return d
+            d2[alias] = d[mime]
+    return d2
 
 def upgrade_output(output):
     """upgrade a single code cell output from v3 to v4
@@ -209,7 +211,7 @@ def downgrade_output(output):
         data = output.pop('data', {})
         if 'application/json' in data:
             data['application/json'] = json.dumps(data['application/json'])
-        from_mime_key(data)
+        data = from_mime_key(data)
         output.update(data)
         from_mime_key(output.get('metadata', {}))
     elif output['output_type'] == 'error':

diff --git a/nbformat/v4/nbbase.py b/nbformat/v4/nbbase.py
@@ -13,7 +13,7 @@
 
 # Change this when incrementing the nbformat version
 nbformat = 4
-nbformat_minor = 1
+nbformat_minor = 2
 nbformat_schema = 'nbformat.v4.schema.json'
 
 

diff --git a/nbformat/v4/nbformat.v4.schema.json b/nbformat/v4/nbformat.v4.schema.json
@@ -1,6 +1,6 @@
 {
     "$schema": "http://json-schema.org/draft-04/schema#",
-    "description": "Jupyter Notebook v4.0 JSON schema.",
+    "description": "Jupyter Notebook v4.2 JSON schema.",
     "type": "object",
     "additionalProperties": false,
     "required": ["metadata", "nbformat_minor", "nbformat", "cells"],

diff --git a/nbformat/v4/rwbase.py b/nbformat/v4/rwbase.py
@@ -5,6 +5,19 @@
 
 from ipython_genutils.py3compat import string_types, cast_unicode_py2
 
+def _is_json_mime(mime):
+    """Is a key a JSON mime-type that should be left alone?"""
+    return mime == 'application/json' or \
+        (mime.startswith('application/') and mime.endswith('+json'))
+
+def _rejoin_mimebundle(data):
+    """Rejoin the multi-line string fields in a mimebundle (in-place)"""
+    for key, value in list(data.items()):
+        if not _is_json_mime(key) \
+        and isinstance(value, list) \
+        and all(isinstance(line, string_types) for line in value):
+            data[key] = ''.join(value)
+    return data
 
 def rejoin_lines(nb):
     """rejoin multiline text into strings
@@ -19,13 +32,16 @@ def rejoin_lines(nb):
     for cell in nb.cells:
         if 'source' in cell and isinstance(cell.source, list):
             cell.source = ''.join(cell.source)
+
+        attachments = cell.get('attachments', {})
+        for key, attachment in attachments.items():
+            _rejoin_mimebundle(attachment)
+
         if cell.get('cell_type', None) == 'code':
             for output in cell.get('outputs', []):
                 output_type = output.get('output_type', '')
                 if output_type in {'execute_result', 'display_data'}:
-                    for key, value in output.get('data', {}).items():
-                        if key != 'application/json' and isinstance(value, list):
-                            output.data[key] = ''.join(value)
+                    _rejoin_mimebundle(output.get('data', {}))
                 elif output_type:
                     if isinstance(output.get('text', ''), list):
                         output.text = ''.join(output.text)
@@ -36,6 +52,15 @@ def rejoin_lines(nb):
     'image/svg+xml',
 }
 
+def _split_mimebundle(data):
+    """Split multi-line string fields in a mimebundle (in-place)"""
+    for key, value in list(data.items()):
+        if isinstance(value, string_types) and (
+            key.startswith('text/') or key in _non_text_split_mimes
+        ):
+            data[key] = value.splitlines(True)
+    return data
+
 def split_lines(nb):
     """split likely multiline text into lists of strings
 
@@ -49,14 +74,14 @@ def split_lines(nb):
         if isinstance(source, string_types):
             cell['source'] = source.splitlines(True)
 
+        attachments = cell.get('attachments', {})
+        for key, attachment in attachments.items():
+            _split_mimebundle(attachment)
+
         if cell.cell_type == 'code':
             for output in cell.outputs:
                 if output.output_type in {'execute_result', 'display_data'}:
-                    for key, value in output.data.items():
-                        if isinstance(value, string_types) and (
-                            key.startswith('text/') or key in _non_text_split_mimes
-                        ):
-                            output.data[key] = value.splitlines(True)
+                    _split_mimebundle(output.get('data', {}))
                 elif output.output_type == 'stream':
                     if isinstance(output.text, string_types):
                         output.text = output.text.splitlines(True)

diff --git a/nbformat/v4/tests/nbexamples.py b/nbformat/v4/tests/nbexamples.py
@@ -24,7 +24,13 @@
 ))
 
 cells.append(new_markdown_cell(
-    source='A random array',
+    source='Cell with attachments',
+    attachments={
+        'attachment1': {
+            'text/plain': '\n'.join(['a', 'b', 'c']),
+            'application/vnd.stuff+json': ['a', 1, 'x'],
+        }
+    }
 ))
 
 cells.append(new_raw_cell(
@@ -48,6 +54,31 @@
     execution_count=4,
 ))
 
+cells.append(new_code_cell(
+    source=u'json_outputs()',
+    execution_count=12,
+    outputs=[new_output(
+        output_type=u'display_data',
+        data={
+            'text/plain': u'<json outputs>',
+            'application/json': {
+                'key': 'value',
+                'x': 5,
+                'lis': [1, 2, 'x']
+            },
+            'application/vnd.listofstr+json': ['a', 'b', 'c'],
+            'application/vnd.numbers+json': [1, 2, 3],
+            'application/vnd.number+json': 42,
+            'application/vnd.object+json': {
+                'number': 5,
+                'array': [1,2],
+                'str': 'x'
+            },
+            'application/vnd.string+json': 'ok',
+        },
+    )]
+))
+
 cells.append(new_code_cell(
     source=u'print "ünîcødé"',
     execution_count=3,

diff --git a/nbformat/v4/tests/test_json.py b/nbformat/v4/tests/test_json.py
@@ -1,4 +1,5 @@
 from base64 import decodestring
+import json
 from unittest import TestCase
 
 from ipython_genutils.py3compat import unicode_type
@@ -27,6 +28,36 @@ def test_roundtrip_split(self):
         # This won't differ from test_roundtrip unless the default changes
         s = writes(nb0, split_lines=True)
         self.assertEqual(nbjson.reads(s),nb0)
+
+    def test_splitlines(self):
+        """Test splitlines in mime-bundles"""
+        s = writes(nb0, split_lines=True)
+        raw_nb = json.loads(s)
+
+        for i, ref_cell in enumerate(nb0.cells):
+            if ref_cell.source.strip() == 'Cell with attachments':
+                attach_ref = ref_cell['attachments']['attachment1']
+                attach_json = raw_nb['cells'][i]['attachments']['attachment1']
+            if ref_cell.source.strip() == 'json_outputs()':
+                output_ref = ref_cell['outputs'][0]['data']
+                output_json = raw_nb['cells'][i]['outputs'][0]['data']
+
+        for key, json_value in attach_json.items():
+            if key == 'text/plain':
+                # text should be split
+                assert json_value == attach_ref['text/plain'].splitlines(True)
+            else:
+                # JSON attachments
+                assert json_value == attach_ref[key]
+
+        # check that JSON outputs are left alone:
+        for key, json_value in output_json.items():
+            if key == 'text/plain':
+                # text should be split
+                assert json_value == output_ref['text/plain'].splitlines(True)
+            else:
+                # JSON outputs should be left alone
+                assert json_value == output_ref[key]
 
     def test_read_png(self):
         """PNG output data is b64 unicode"""