Permalink
Browse files

split likely multiline strings when writing to/from JSON

  • Loading branch information...
minrk committed Nov 9, 2011
1 parent 7c0ffa5 commit 5d9a42c2c858ba111e076e01820587a1900aef7e
@@ -118,7 +118,12 @@ def get_notebook(self, notebook_id, format=u'json'):
if format not in self.allowed_formats:
raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
last_modified, nb = self.get_notebook_object(notebook_id)
- data = current.writes(nb, format)
+ kwargs = {}
+ if format == 'json':
+ # don't split lines for sending over the wire, because it
+ # should match the Python in-memory format.
+ kwargs['split_lines'] = False
+ data = current.writes(nb, format, **kwargs)
name = nb.get('name','notebook')
return last_modified, name, data
@@ -16,10 +16,14 @@
# Imports
#-----------------------------------------------------------------------------
-from .nbbase import from_dict
-from .rwbase import NotebookReader, NotebookWriter, restore_bytes
+import copy
import json
+from .nbbase import from_dict
+from .rwbase import (
+ NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
+)
+
#-----------------------------------------------------------------------------
# Code
#-----------------------------------------------------------------------------
@@ -40,7 +44,7 @@ def reads(self, s, **kwargs):
return nb
def to_notebook(self, d, **kwargs):
- return restore_bytes(from_dict(d))
+ return restore_bytes(rejoin_lines(from_dict(d)))
class JSONWriter(NotebookWriter):
@@ -49,8 +53,10 @@ def writes(self, nb, **kwargs):
kwargs['cls'] = BytesEncoder
kwargs['indent'] = 4
kwargs['sort_keys'] = True
+ if kwargs.pop('split_lines', True):
+ nb = split_lines(copy.deepcopy(nb))
return json.dumps(nb, **kwargs)
-
+
_reader = JSONReader()
_writer = JSONWriter()
@@ -41,6 +41,61 @@ def restore_bytes(nb):
output.jpeg = str_to_bytes(output.jpeg, 'ascii')
return nb
+# output keys that are likely to have multiline values
+_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
+
+def rejoin_lines(nb):
+ """rejoin multiline text into strings
+
+ For reversing effects of ``split_lines(nb)``.
+
+ This only rejoins lines that have been split, so if text objects were not split
+ they will pass through unchanged.
+
+ Used when reading JSON files that may have been passed through split_lines.
+ """
+ for ws in nb.worksheets:
+ for cell in ws.cells:
+ if cell.cell_type == 'code':
+ if 'input' in cell and isinstance(cell.input, list):
+ cell.input = u'\n'.join(cell.input)
+ for output in cell.outputs:
+ for key in _multiline_outputs:
+ item = output.get(key, None)
+ if isinstance(item, list):
+ output[key] = u'\n'.join(item)
+ else: # text cell
+ for key in ['source', 'rendered']:
+ item = cell.get(key, None)
+ if isinstance(item, list):
+ cell[key] = u'\n'.join(item)
+ return nb
+
+
+def split_lines(nb):
+ """split likely multiline text into lists of strings
+
+ For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
+ reverse the effects of ``split_lines(nb)``.
+
+ Used when writing JSON files.
+ """
+ for ws in nb.worksheets:
+ for cell in ws.cells:
+ if cell.cell_type == 'code':
+ if 'input' in cell and isinstance(cell.input, basestring):
+ cell.input = cell.input.splitlines()
+ for output in cell.outputs:
+ for key in _multiline_outputs:
+ item = output.get(key, None)
+ if isinstance(item, basestring):
+ output[key] = item.splitlines()
+ else: # text cell
+ for key in ['source', 'rendered']:
+ item = cell.get(key, None)
+ if isinstance(item, basestring):
+ cell[key] = item.splitlines()
+ return nb
# b64 encode/decode are never actually used, because all bytes objects in
# the notebook are already b64-encoded, and we don't need/want to double-encode
@@ -16,6 +16,19 @@ def test_roundtrip(self):
# print
# print s
self.assertEquals(reads(s),nb0)
+
+ def test_roundtrip_nosplit(self):
+ """Ensure that multiline blobs are still readable"""
+ # ensures that notebooks written prior to splitlines change
+ # are still readable.
+ s = writes(nb0, split_lines=False)
+ self.assertEquals(reads(s),nb0)
+
+ def test_roundtrip_split(self):
+ """Ensure that splitting multiline blocks is safe"""
+ # This won't differ from test_roundtrip unless the default changes
+ s = writes(nb0, split_lines=True)
+ self.assertEquals(reads(s),nb0)

0 comments on commit 5d9a42c

Please sign in to comment.