Skip to content

Commit

Permalink
split likely multiline strings when writing to/from JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
minrk committed Nov 9, 2011
1 parent 7c0ffa5 commit 5d9a42c
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 5 deletions.
7 changes: 6 additions & 1 deletion IPython/frontend/html/notebook/notebookmanager.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -118,7 +118,12 @@ def get_notebook(self, notebook_id, format=u'json'):
if format not in self.allowed_formats: if format not in self.allowed_formats:
raise web.HTTPError(415, u'Invalid notebook format: %s' % format) raise web.HTTPError(415, u'Invalid notebook format: %s' % format)
last_modified, nb = self.get_notebook_object(notebook_id) last_modified, nb = self.get_notebook_object(notebook_id)
data = current.writes(nb, format) kwargs = {}
if format == 'json':
# don't split lines for sending over the wire, because it
# should match the Python in-memory format.
kwargs['split_lines'] = False
data = current.writes(nb, format, **kwargs)
name = nb.get('name','notebook') name = nb.get('name','notebook')
return last_modified, name, data return last_modified, name, data


Expand Down
14 changes: 10 additions & 4 deletions IPython/nbformat/v2/nbjson.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@
# Imports # Imports
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------


from .nbbase import from_dict import copy
from .rwbase import NotebookReader, NotebookWriter, restore_bytes
import json import json


from .nbbase import from_dict
from .rwbase import (
NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines
)

#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# Code # Code
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
Expand All @@ -40,7 +44,7 @@ def reads(self, s, **kwargs):
return nb return nb


def to_notebook(self, d, **kwargs): def to_notebook(self, d, **kwargs):
return restore_bytes(from_dict(d)) return restore_bytes(rejoin_lines(from_dict(d)))




class JSONWriter(NotebookWriter): class JSONWriter(NotebookWriter):
Expand All @@ -49,8 +53,10 @@ def writes(self, nb, **kwargs):
kwargs['cls'] = BytesEncoder kwargs['cls'] = BytesEncoder
kwargs['indent'] = 4 kwargs['indent'] = 4
kwargs['sort_keys'] = True kwargs['sort_keys'] = True
if kwargs.pop('split_lines', True):
nb = split_lines(copy.deepcopy(nb))
return json.dumps(nb, **kwargs) return json.dumps(nb, **kwargs)



_reader = JSONReader() _reader = JSONReader()
_writer = JSONWriter() _writer = JSONWriter()
Expand Down
55 changes: 55 additions & 0 deletions IPython/nbformat/v2/rwbase.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -41,6 +41,61 @@ def restore_bytes(nb):
output.jpeg = str_to_bytes(output.jpeg, 'ascii') output.jpeg = str_to_bytes(output.jpeg, 'ascii')
return nb return nb


# output keys that are likely to have multiline values
_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']

def rejoin_lines(nb):
"""rejoin multiline text into strings
For reversing effects of ``split_lines(nb)``.
This only rejoins lines that have been split, so if text objects were not split
they will pass through unchanged.
Used when reading JSON files that may have been passed through split_lines.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
if 'input' in cell and isinstance(cell.input, list):
cell.input = u'\n'.join(cell.input)
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, list):
output[key] = u'\n'.join(item)
else: # text cell
for key in ['source', 'rendered']:
item = cell.get(key, None)
if isinstance(item, list):
cell[key] = u'\n'.join(item)
return nb


def split_lines(nb):
"""split likely multiline text into lists of strings
For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
reverse the effects of ``split_lines(nb)``.
Used when writing JSON files.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
if 'input' in cell and isinstance(cell.input, basestring):
cell.input = cell.input.splitlines()
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, basestring):
output[key] = item.splitlines()
else: # text cell
for key in ['source', 'rendered']:
item = cell.get(key, None)
if isinstance(item, basestring):
cell[key] = item.splitlines()
return nb


# b64 encode/decode are never actually used, because all bytes objects in # b64 encode/decode are never actually used, because all bytes objects in
# the notebook are already b64-encoded, and we don't need/want to double-encode # the notebook are already b64-encoded, and we don't need/want to double-encode
Expand Down
13 changes: 13 additions & 0 deletions IPython/nbformat/v2/tests/test_json.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@ def test_roundtrip(self):
# print # print
# print s # print s
self.assertEquals(reads(s),nb0) self.assertEquals(reads(s),nb0)

def test_roundtrip_nosplit(self):
"""Ensure that multiline blobs are still readable"""
# ensures that notebooks written prior to splitlines change
# are still readable.
s = writes(nb0, split_lines=False)
self.assertEquals(reads(s),nb0)

def test_roundtrip_split(self):
"""Ensure that splitting multiline blocks is safe"""
# This won't differ from test_roundtrip unless the default changes
s = writes(nb0, split_lines=True)
self.assertEquals(reads(s),nb0)






0 comments on commit 5d9a42c

Please sign in to comment.