split likely multiline strings when writing to/from JSON

minrk · Nov 9, 2011 · 5d9a42c · 5d9a42c
1 parent 7c0ffa5
commit 5d9a42c
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 5 deletions.
diff --git a/IPython/frontend/html/notebook/notebookmanager.py b/IPython/frontend/html/notebook/notebookmanager.py
@@ -118,7 +118,12 @@ def get_notebook(self, notebook_id, format=u'json'):
         if format not in self.allowed_formats:
             raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
         last_modified, nb = self.get_notebook_object(notebook_id)
-        data = current.writes(nb, format)
+        kwargs = {}
+        if format == 'json':
+            # don't split lines for sending over the wire, because it
+            # should match the Python in-memory format.
+            kwargs['split_lines'] = False
+        data = current.writes(nb, format, **kwargs)
         name = nb.get('name','notebook')
         return last_modified, name, data
 

diff --git a/IPython/nbformat/v2/nbjson.py b/IPython/nbformat/v2/nbjson.py
@@ -16,10 +16,14 @@
 # Imports
 #-----------------------------------------------------------------------------
 
-from .nbbase import from_dict
+import copy
-from .rwbase import NotebookReader, NotebookWriter, restore_bytes
 import json
 
+from .nbbase import from_dict
+from .rwbase import (
+    NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
+)
+
 #-----------------------------------------------------------------------------
 # Code
 #-----------------------------------------------------------------------------
@@ -40,7 +44,7 @@ def reads(self, s, **kwargs):
         return nb
 
     def to_notebook(self, d, **kwargs):
-        return restore_bytes(from_dict(d))
+        return restore_bytes(rejoin_lines(from_dict(d)))
 
 
 class JSONWriter(NotebookWriter):
@@ -49,8 +53,10 @@ def writes(self, nb, **kwargs):
         kwargs['cls'] = BytesEncoder
         kwargs['indent'] = 4
         kwargs['sort_keys'] = True
+        if kwargs.pop('split_lines', True):
+            nb = split_lines(copy.deepcopy(nb))
         return json.dumps(nb, **kwargs)
-
+    
 
 _reader = JSONReader()
 _writer = JSONWriter()

diff --git a/IPython/nbformat/v2/rwbase.py b/IPython/nbformat/v2/rwbase.py
@@ -41,6 +41,61 @@ def restore_bytes(nb):
                         output.jpeg = str_to_bytes(output.jpeg, 'ascii')
     return nb
 
+# output keys that are likely to have multiline values
+_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
+
+def rejoin_lines(nb):
+    """rejoin multiline text into strings
+    
+    For reversing effects of ``split_lines(nb)``.
+    
+    This only rejoins lines that have been split, so if text objects were not split
+    they will pass through unchanged.
+    
+    Used when reading JSON files that may have been passed through split_lines.
+    """
+    for ws in nb.worksheets:
+        for cell in ws.cells:
+            if cell.cell_type == 'code':
+                if 'input' in cell and isinstance(cell.input, list):
+                    cell.input = u'\n'.join(cell.input)
+                for output in cell.outputs:
+                    for key in _multiline_outputs:
+                        item = output.get(key, None)
+                        if isinstance(item, list):
+                            output[key] = u'\n'.join(item)
+            else: # text cell
+                for key in ['source', 'rendered']:
+                    item = cell.get(key, None)
+                    if isinstance(item, list):
+                        cell[key] = u'\n'.join(item)
+    return nb
+
+
+def split_lines(nb):
+    """split likely multiline text into lists of strings
+    
+    For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
+    reverse the effects of ``split_lines(nb)``.
+    
+    Used when writing JSON files.
+    """
+    for ws in nb.worksheets:
+        for cell in ws.cells:
+            if cell.cell_type == 'code':
+                if 'input' in cell and isinstance(cell.input, basestring):
+                    cell.input = cell.input.splitlines()
+                for output in cell.outputs:
+                    for key in _multiline_outputs:
+                        item = output.get(key, None)
+                        if isinstance(item, basestring):
+                            output[key] = item.splitlines()
+            else: # text cell
+                for key in ['source', 'rendered']:
+                    item = cell.get(key, None)
+                    if isinstance(item, basestring):
+                        cell[key] = item.splitlines()
+    return nb
 
 # b64 encode/decode are never actually used, because all bytes objects in
 # the notebook are already b64-encoded, and we don't need/want to double-encode

diff --git a/IPython/nbformat/v2/tests/test_json.py b/IPython/nbformat/v2/tests/test_json.py
@@ -16,6 +16,19 @@ def test_roundtrip(self):
 #        print
 #        print s
         self.assertEquals(reads(s),nb0)
+
+    def test_roundtrip_nosplit(self):
+        """Ensure that multiline blobs are still readable"""
+        # ensures that notebooks written prior to splitlines change
+        # are still readable.
+        s = writes(nb0, split_lines=False)
+        self.assertEquals(reads(s),nb0)
+
+    def test_roundtrip_split(self):
+        """Ensure that splitting multiline blocks is safe"""
+        # This won't differ from test_roundtrip unless the default changes
+        s = writes(nb0, split_lines=True)
+        self.assertEquals(reads(s),nb0)