pandas-dev · jreback · Oct 7, 2013 · Oct 6, 2013
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1054,8 +1054,9 @@ with optional parameters:
 - ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
 - ``force_ascii`` : force encoded string to be ASCII, default True.
 - ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
+- ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serialisable object.
 
-Note NaN's, NaT's and None will be converted to null and datetime objects will be converted based on the date_format and date_unit parameters.
+Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.
 
 .. ipython:: python
 
@@ -1098,6 +1099,48 @@ Writing to a file, with a date index and a date column
    dfj2.to_json('test.json')
    open('test.json').read()
 
+If the JSON serialiser cannot handle the container contents directly it will fallback in the following manner:
+
+- if a ``toDict`` method is defined by the unrecognised object then that 
+  will be called and its returned ``dict`` will be JSON serialised.
+- if a ``default_handler`` has been passed to ``to_json`` that will
+  be called to convert the object.
+- otherwise an attempt is made to convert the object to a ``dict`` by
+  parsing its contents. However if the object is complex this will often fail
+  with an ``OverflowError``.
+
+Your best bet when encountering ``OverflowError`` during serialisation
+is to specify a ``default_handler``. For example ``timedelta`` can cause 
+problems:
+
+.. ipython:: python
+   :suppress:
+
+   from datetime import timedelta
+   dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
+
+.. code-block:: ipython
+
+   In [141]: from datetime import timedelta
+
+   In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
+
+   In [143]: dftd.to_json()
+
+   ---------------------------------------------------------------------------
+   OverflowError                             Traceback (most recent call last)
+   OverflowError: Maximum recursion level reached
+
+which can be dealt with by specifying a simple ``default_handler``:
+
+.. ipython:: python
+
+   dftd.to_json(default_handler=str)
+
+   def my_handler(obj):
+      return obj.total_seconds()
+   dftd.to_json(default_handler=my_handler)
+
 Reading JSON
 ~~~~~~~~~~~~
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -233,6 +233,8 @@ API Changes
 
     - added ``date_unit`` parameter to specify resolution of timestamps. Options
       are seconds, milliseconds, microseconds and nanoseconds. (:issue:`4362`, :issue:`4498`).
+    - added ``default_handler`` parameter to allow a callable to be passed which will be
+      responsible for handling otherwise unserialisable objects.
 
   - ``Index`` and ``MultiIndex`` changes (:issue:`4039`):
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -707,7 +707,8 @@ def __setstate__(self, state):
     # I/O Methods
 
     def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
-                double_precision=10, force_ascii=True, date_unit='ms'):
+                double_precision=10, force_ascii=True, date_unit='ms',
+                default_handler=None):
         """
         Convert the object to a JSON string.
 
@@ -728,25 +729,32 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
             * DataFrame
 
               - default is 'columns'
-              - allowed values are: {'split','records','index','columns','values'}
+              - allowed values are:
+                {'split','records','index','columns','values'}
 
             * The format of the JSON string
 
-              - split : dict like {index -> [index], columns -> [columns], data -> [values]}
-              - records : list like [{column -> value}, ... , {column -> value}]
+              - split : dict like
+                {index -> [index], columns -> [columns], data -> [values]}
+              - records : list like
+                [{column -> value}, ... , {column -> value}]
               - index : dict like {index -> {column -> value}}
               - columns : dict like {column -> {index -> value}}
               - values : just the values array
 
-        date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601)
-            default is epoch
+        date_format : type of date conversion, epoch or iso
+            epoch = epoch milliseconds, iso = ISO8601, default is epoch
         double_precision : The number of decimal places to use when encoding
             floating point values, default 10.
         force_ascii : force encoded string to be ASCII, default True.
         date_unit : string, default 'ms' (milliseconds)
             The time unit to encode to, governs timestamp and ISO8601
             precision.  One of 's', 'ms', 'us', 'ns' for second, millisecond,
             microsecond, and nanosecond respectively.
+        default_handler : callable, default None
+            Handler to call if object cannot otherwise be converted to a
+            suitable format for JSON. Should receive a single argument which is
+            the object to convert and return a serialisable object.
 
         Returns
         -------
@@ -761,7 +769,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
             date_format=date_format,
             double_precision=double_precision,
             force_ascii=force_ascii,
-            date_unit=date_unit)
+            date_unit=date_unit,
+            default_handler=default_handler)
 
     def to_hdf(self, path_or_buf, key, **kwargs):
         """ activate the HDFStore

diff --git a/pandas/io/json.py b/pandas/io/json.py
@@ -17,19 +17,21 @@
 dumps = _json.dumps
 ### interface to/from ###
 
+
 def to_json(path_or_buf, obj, orient=None, date_format='epoch',
-            double_precision=10, force_ascii=True, date_unit='ms'):
+            double_precision=10, force_ascii=True, date_unit='ms',
+            default_handler=None):
 
     if isinstance(obj, Series):
         s = SeriesWriter(
             obj, orient=orient, date_format=date_format,
             double_precision=double_precision, ensure_ascii=force_ascii,
-            date_unit=date_unit).write()
+            date_unit=date_unit, default_handler=default_handler).write()
     elif isinstance(obj, DataFrame):
         s = FrameWriter(
             obj, orient=orient, date_format=date_format,
             double_precision=double_precision, ensure_ascii=force_ascii,
-            date_unit=date_unit).write()
+            date_unit=date_unit, default_handler=default_handler).write()
     else:
         raise NotImplementedError
 
@@ -45,7 +47,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
 class Writer(object):
 
     def __init__(self, obj, orient, date_format, double_precision,
-                 ensure_ascii, date_unit):
+                 ensure_ascii, date_unit, default_handler=None):
         self.obj = obj
 
         if orient is None:
@@ -56,6 +58,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         self.double_precision = double_precision
         self.ensure_ascii = ensure_ascii
         self.date_unit = date_unit
+        self.default_handler = default_handler
 
         self.is_copy = False
         self._format_axes()
@@ -70,7 +73,9 @@ def write(self):
             double_precision=self.double_precision,
             ensure_ascii=self.ensure_ascii,
             date_unit=self.date_unit,
-            iso_dates=self.date_format == 'iso')
+            iso_dates=self.date_format == 'iso',
+            default_handler=self.default_handler)
+
 
 class SeriesWriter(Writer):
     _default_orient = 'index'
@@ -121,13 +126,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
           - default is ``'columns'``
           - allowed values are: {'split','records','index','columns','values'}
-          - The DataFrame index must be unique for orients 'index' and 'columns'.
-          - The DataFrame columns must be unique for orients 'index', 'columns', and 'records'.
+          - The DataFrame index must be unique for orients 'index' and
+            'columns'.
+          - The DataFrame columns must be unique for orients 'index',
+            'columns', and 'records'.
 
         * The format of the JSON string
 
-          - split : dict like ``{index -> [index], columns -> [columns], data -> [values]}``
-          - records : list like ``[{column -> value}, ... , {column -> value}]``
+          - split : dict like
+            ``{index -> [index], columns -> [columns], data -> [values]}``
+          - records : list like
+            ``[{column -> value}, ... , {column -> value}]``
           - index : dict like ``{index -> {column -> value}}``
           - columns : dict like ``{column -> {index -> value}}``
           - values : just the values array
@@ -384,7 +393,6 @@ class SeriesParser(Parser):
     _default_orient = 'index'
     _split_keys = ('name', 'index', 'data')
 
-
     def _parse_no_numpy(self):
 
         json = self.json
@@ -542,7 +550,7 @@ def is_ok(col):
 #----------------------------------------------------------------------
 # JSON normalization routines
 
-def nested_to_record(ds,prefix="",level=0):
+def nested_to_record(ds, prefix="", level=0):
     """a simplified json_normalize
 
     converts a nested dict into a flat dict ("record"), unlike json_normalize,
@@ -557,7 +565,8 @@ def nested_to_record(ds,prefix="",level=0):
     d - dict or list of dicts, matching `ds`
 
     Example:
-    IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),nested=dict(e=dict(c=1,d=2),d=2)))
+    IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
+                                  nested=dict(e=dict(c=1,d=2),d=2)))
     Out[52]:
     {'dict1.c': 1,
      'dict1.d': 2,
@@ -567,31 +576,31 @@ def nested_to_record(ds,prefix="",level=0):
      'nested.e.d': 2}
     """
     singleton = False
-    if isinstance(ds,dict):
+    if isinstance(ds, dict):
         ds = [ds]
         singleton = True
 
     new_ds = []
     for d in ds:
 
         new_d = copy.deepcopy(d)
-        for k,v in d.items():
+        for k, v in d.items():
             # each key gets renamed with prefix
             if level == 0:
                 newkey = str(k)
             else:
-                newkey = prefix+'.'+ str(k)
+                newkey = prefix + '.' + str(k)
 
             # only dicts gets recurse-flattend
             # only at level>1 do we rename the rest of the keys
-            if not isinstance(v,dict):
-                if level!=0: # so we skip copying for top level, common case
+            if not isinstance(v, dict):
+                if level != 0:  # so we skip copying for top level, common case
                     v = new_d.pop(k)
-                    new_d[newkey]= v
+                    new_d[newkey] = v
                 continue
             else:
                 v = new_d.pop(k)
-                new_d.update(nested_to_record(v,newkey,level+1))
+                new_d.update(nested_to_record(v, newkey, level+1))
         new_ds.append(new_d)
 
     if singleton:
@@ -663,13 +672,14 @@ def _pull_field(js, spec):
         data = [data]
 
     if record_path is None:
-        if any([isinstance(x,dict) for x in compat.itervalues(data[0])]):
+        if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
             #  {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
             #
-            # TODO: handle record value which are lists, at least error reasonabley
+            # TODO: handle record value which are lists, at least error
+            #       reasonably
             data = nested_to_record(data)
         return DataFrame(data)
     elif not isinstance(record_path, list):

diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
@@ -575,3 +575,16 @@ def test_url(self):
 
         url = 'http://search.twitter.com/search.json?q=pandas%20python'
         result = read_json(url)
+
+    def test_default_handler(self):
+        from datetime import timedelta
+        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
+        self.assertRaises(OverflowError, frame.to_json)
+        expected = DataFrame([str(timedelta(23)), str(timedelta(seconds=5))])
+        assert_frame_equal(
+            expected, pd.read_json(frame.to_json(default_handler=str)))
+
+        def my_handler_raises(obj):
+            raise TypeError
+        self.assertRaises(
+            TypeError, frame.to_json, default_handler=my_handler_raises)
diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py
@@ -836,6 +836,51 @@ def toDict(self):
         dec = ujson.decode(output)
         self.assertEquals(dec, d)
 
+    def test_defaultHandler(self):
+
+        class _TestObject(object):
+
+            def __init__(self, val):
+                self.val = val
+
+            @property
+            def recursive_attr(self):
+                return _TestObject("recursive_attr")
+
+            def __str__(self):
+                return str(self.val)
+
+        self.assertRaises(OverflowError, ujson.encode, _TestObject("foo"))
+        self.assertEquals('"foo"', ujson.encode(_TestObject("foo"),
+                                                default_handler=str))
+
+        def my_handler(obj):
+            return "foobar"
+        self.assertEquals('"foobar"', ujson.encode(_TestObject("foo"),
+                                                   default_handler=my_handler))
+
+        def my_handler_raises(obj):
+            raise TypeError("I raise for anything")
+        with tm.assertRaisesRegexp(TypeError, "I raise for anything"):
+            ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)
+
+        def my_int_handler(obj):
+            return 42
+        self.assertEquals(
+            42, ujson.decode(ujson.encode(_TestObject("foo"),
+                                          default_handler=my_int_handler)))
+
+        def my_obj_handler(obj):
+            return datetime.datetime(2013, 2, 3)
+        self.assertEquals(
+            ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))),
+            ujson.decode(ujson.encode(_TestObject("foo"),
+                                      default_handler=my_obj_handler)))
+
+        l = [_TestObject("foo"), _TestObject("bar")]
+        self.assertEquals(json.loads(json.dumps(l, default=str)),
+                          ujson.decode(ujson.encode(l, default_handler=str)))
+
 
 class NumpyJSONTests(TestCase):