Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,9 @@ with optional parameters:
- ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
- ``force_ascii`` : force encoded string to be ASCII, default True.
- ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
- ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serialisable object.

Note NaN's, NaT's and None will be converted to null and datetime objects will be converted based on the date_format and date_unit parameters.
Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.

.. ipython:: python

Expand Down Expand Up @@ -1098,6 +1099,48 @@ Writing to a file, with a date index and a date column
dfj2.to_json('test.json')
open('test.json').read()

If the JSON serialiser cannot handle the container contents directly it will fallback in the following manner:

- if a ``toDict`` method is defined by the unrecognised object then that
will be called and its returned ``dict`` will be JSON serialised.
- if a ``default_handler`` has been passed to ``to_json`` that will
be called to convert the object.
- otherwise an attempt is made to convert the object to a ``dict`` by
parsing its contents. However if the object is complex this will often fail
with an ``OverflowError``.

Your best bet when encountering ``OverflowError`` during serialisation
is to specify a ``default_handler``. For example ``timedelta`` can cause
problems:

.. ipython:: python
:suppress:

from datetime import timedelta
dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])

.. code-block:: ipython

In [141]: from datetime import timedelta

In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])

In [143]: dftd.to_json()

---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)
OverflowError: Maximum recursion level reached

which can be dealt with by specifying a simple ``default_handler``:

.. ipython:: python

dftd.to_json(default_handler=str)

def my_handler(obj):
return obj.total_seconds()
dftd.to_json(default_handler=my_handler)

Reading JSON
~~~~~~~~~~~~

Expand Down
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ API Changes

- added ``date_unit`` parameter to specify resolution of timestamps. Options
are seconds, milliseconds, microseconds and nanoseconds. (:issue:`4362`, :issue:`4498`).
- added ``default_handler`` parameter to allow a callable to be passed which will be
responsible for handling otherwise unserialisable objects.

- ``Index`` and ``MultiIndex`` changes (:issue:`4039`):

Expand Down
23 changes: 16 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,8 @@ def __setstate__(self, state):
# I/O Methods

def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
double_precision=10, force_ascii=True, date_unit='ms'):
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None):
"""
Convert the object to a JSON string.

Expand All @@ -728,25 +729,32 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
* DataFrame

- default is 'columns'
- allowed values are: {'split','records','index','columns','values'}
- allowed values are:
{'split','records','index','columns','values'}

* The format of the JSON string

- split : dict like {index -> [index], columns -> [columns], data -> [values]}
- records : list like [{column -> value}, ... , {column -> value}]
- split : dict like
{index -> [index], columns -> [columns], data -> [values]}
- records : list like
[{column -> value}, ... , {column -> value}]
- index : dict like {index -> {column -> value}}
- columns : dict like {column -> {index -> value}}
- values : just the values array

date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601)
default is epoch
date_format : type of date conversion, epoch or iso
epoch = epoch milliseconds, iso = ISO8601, default is epoch
double_precision : The number of decimal places to use when encoding
floating point values, default 10.
force_ascii : force encoded string to be ASCII, default True.
date_unit : string, default 'ms' (milliseconds)
The time unit to encode to, governs timestamp and ISO8601
precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
microsecond, and nanosecond respectively.
default_handler : callable, default None
Handler to call if object cannot otherwise be converted to a
suitable format for JSON. Should receive a single argument which is
the object to convert and return a serialisable object.

Returns
-------
Expand All @@ -761,7 +769,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
date_format=date_format,
double_precision=double_precision,
force_ascii=force_ascii,
date_unit=date_unit)
date_unit=date_unit,
default_handler=default_handler)

def to_hdf(self, path_or_buf, key, **kwargs):
""" activate the HDFStore
Expand Down
52 changes: 31 additions & 21 deletions pandas/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,21 @@
dumps = _json.dumps
### interface to/from ###


def to_json(path_or_buf, obj, orient=None, date_format='epoch',
double_precision=10, force_ascii=True, date_unit='ms'):
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None):

if isinstance(obj, Series):
s = SeriesWriter(
obj, orient=orient, date_format=date_format,
double_precision=double_precision, ensure_ascii=force_ascii,
date_unit=date_unit).write()
date_unit=date_unit, default_handler=default_handler).write()
elif isinstance(obj, DataFrame):
s = FrameWriter(
obj, orient=orient, date_format=date_format,
double_precision=double_precision, ensure_ascii=force_ascii,
date_unit=date_unit).write()
date_unit=date_unit, default_handler=default_handler).write()
else:
raise NotImplementedError

Expand All @@ -45,7 +47,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
class Writer(object):

def __init__(self, obj, orient, date_format, double_precision,
ensure_ascii, date_unit):
ensure_ascii, date_unit, default_handler=None):
self.obj = obj

if orient is None:
Expand All @@ -56,6 +58,7 @@ def __init__(self, obj, orient, date_format, double_precision,
self.double_precision = double_precision
self.ensure_ascii = ensure_ascii
self.date_unit = date_unit
self.default_handler = default_handler

self.is_copy = False
self._format_axes()
Expand All @@ -70,7 +73,9 @@ def write(self):
double_precision=self.double_precision,
ensure_ascii=self.ensure_ascii,
date_unit=self.date_unit,
iso_dates=self.date_format == 'iso')
iso_dates=self.date_format == 'iso',
default_handler=self.default_handler)


class SeriesWriter(Writer):
_default_orient = 'index'
Expand Down Expand Up @@ -121,13 +126,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,

- default is ``'columns'``
- allowed values are: {'split','records','index','columns','values'}
- The DataFrame index must be unique for orients 'index' and 'columns'.
- The DataFrame columns must be unique for orients 'index', 'columns', and 'records'.
- The DataFrame index must be unique for orients 'index' and
'columns'.
- The DataFrame columns must be unique for orients 'index',
'columns', and 'records'.

* The format of the JSON string

- split : dict like ``{index -> [index], columns -> [columns], data -> [values]}``
- records : list like ``[{column -> value}, ... , {column -> value}]``
- split : dict like
``{index -> [index], columns -> [columns], data -> [values]}``
- records : list like
``[{column -> value}, ... , {column -> value}]``
- index : dict like ``{index -> {column -> value}}``
- columns : dict like ``{column -> {index -> value}}``
- values : just the values array
Expand Down Expand Up @@ -384,7 +393,6 @@ class SeriesParser(Parser):
_default_orient = 'index'
_split_keys = ('name', 'index', 'data')


def _parse_no_numpy(self):

json = self.json
Expand Down Expand Up @@ -542,7 +550,7 @@ def is_ok(col):
#----------------------------------------------------------------------
# JSON normalization routines

def nested_to_record(ds,prefix="",level=0):
def nested_to_record(ds, prefix="", level=0):
"""a simplified json_normalize

converts a nested dict into a flat dict ("record"), unlike json_normalize,
Expand All @@ -557,7 +565,8 @@ def nested_to_record(ds,prefix="",level=0):
d - dict or list of dicts, matching `ds`

Example:
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),nested=dict(e=dict(c=1,d=2),d=2)))
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
nested=dict(e=dict(c=1,d=2),d=2)))
Out[52]:
{'dict1.c': 1,
'dict1.d': 2,
Expand All @@ -567,31 +576,31 @@ def nested_to_record(ds,prefix="",level=0):
'nested.e.d': 2}
"""
singleton = False
if isinstance(ds,dict):
if isinstance(ds, dict):
ds = [ds]
singleton = True

new_ds = []
for d in ds:

new_d = copy.deepcopy(d)
for k,v in d.items():
for k, v in d.items():
# each key gets renamed with prefix
if level == 0:
newkey = str(k)
else:
newkey = prefix+'.'+ str(k)
newkey = prefix + '.' + str(k)

# only dicts gets recurse-flattend
# only at level>1 do we rename the rest of the keys
if not isinstance(v,dict):
if level!=0: # so we skip copying for top level, common case
if not isinstance(v, dict):
if level != 0: # so we skip copying for top level, common case
v = new_d.pop(k)
new_d[newkey]= v
new_d[newkey] = v
continue
else:
v = new_d.pop(k)
new_d.update(nested_to_record(v,newkey,level+1))
new_d.update(nested_to_record(v, newkey, level+1))
new_ds.append(new_d)

if singleton:
Expand Down Expand Up @@ -663,13 +672,14 @@ def _pull_field(js, spec):
data = [data]

if record_path is None:
if any([isinstance(x,dict) for x in compat.itervalues(data[0])]):
if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
# naive normalization, this is idempotent for flat records
# and potentially will inflate the data considerably for
# deeply nested structures:
# {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
#
# TODO: handle record value which are lists, at least error reasonabley
# TODO: handle record value which are lists, at least error
# reasonably
data = nested_to_record(data)
return DataFrame(data)
elif not isinstance(record_path, list):
Expand Down
13 changes: 13 additions & 0 deletions pandas/io/tests/test_json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,3 +575,16 @@ def test_url(self):

url = 'http://search.twitter.com/search.json?q=pandas%20python'
result = read_json(url)

def test_default_handler(self):
from datetime import timedelta
frame = DataFrame([timedelta(23), timedelta(seconds=5)])
self.assertRaises(OverflowError, frame.to_json)
expected = DataFrame([str(timedelta(23)), str(timedelta(seconds=5))])
assert_frame_equal(
expected, pd.read_json(frame.to_json(default_handler=str)))

def my_handler_raises(obj):
raise TypeError
self.assertRaises(
TypeError, frame.to_json, default_handler=my_handler_raises)
45 changes: 45 additions & 0 deletions pandas/io/tests/test_json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,51 @@ def toDict(self):
dec = ujson.decode(output)
self.assertEquals(dec, d)

def test_defaultHandler(self):

class _TestObject(object):

def __init__(self, val):
self.val = val

@property
def recursive_attr(self):
return _TestObject("recursive_attr")

def __str__(self):
return str(self.val)

self.assertRaises(OverflowError, ujson.encode, _TestObject("foo"))
self.assertEquals('"foo"', ujson.encode(_TestObject("foo"),
default_handler=str))

def my_handler(obj):
return "foobar"
self.assertEquals('"foobar"', ujson.encode(_TestObject("foo"),
default_handler=my_handler))

def my_handler_raises(obj):
raise TypeError("I raise for anything")
with tm.assertRaisesRegexp(TypeError, "I raise for anything"):
ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)

def my_int_handler(obj):
return 42
self.assertEquals(
42, ujson.decode(ujson.encode(_TestObject("foo"),
default_handler=my_int_handler)))

def my_obj_handler(obj):
return datetime.datetime(2013, 2, 3)
self.assertEquals(
ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))),
ujson.decode(ujson.encode(_TestObject("foo"),
default_handler=my_obj_handler)))

l = [_TestObject("foo"), _TestObject("bar")]
self.assertEquals(json.loads(json.dumps(l, default=str)),
ujson.decode(ujson.encode(l, default_handler=str)))


class NumpyJSONTests(TestCase):

Expand Down
Loading