Skip to content

Commit

Permalink
BUG: fix json segfaults
Browse files Browse the repository at this point in the history
closes #11473
closes #10778
closes #11299

Author: Kieran O'Mahony <kieranom@gmail.com>

Closes #12802 from Komnomnomnom/json-seg-faults and squashes the following commits:

b14d0df [Kieran O'Mahony] CLN: rename json test inline with others
af006a4 [Kieran O'Mahony] BUG: fix json segfaults
  • Loading branch information
Komnomnomnom authored and jreback committed Apr 26, 2016
1 parent c33eb36 commit 37a7e69
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 42 deletions.
8 changes: 6 additions & 2 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ Other Enhancements
- ``pd.read_csv()`` now supports opening files using xz compression, via extension inference or explicit ``compression='xz'`` is specified; ``xz`` compressions is also supported by ``DataFrame.to_csv`` in the same way (:issue:`11852`)
- ``pd.read_msgpack()`` now always gives writeable ndarrays even when compression is used (:issue:`12359`).
- ``pd.read_msgpack()`` now supports serializing and de-serializing categoricals with msgpack (:issue:`12573`)
- ``.to_json()`` now supports ``NDFrames`` that contain categorical and sparse data (:issue:`10778`)
- ``interpolate()`` now supports ``method='akima'`` (:issue:`7588`).
- ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
- ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`)
Expand Down Expand Up @@ -398,8 +399,6 @@ Deprecations





.. _whatsnew_0181.performance:

Performance Improvements
Expand Down Expand Up @@ -443,6 +442,11 @@ Bug Fixes
- Bug in correctly raising a ``ValueError`` in ``.resample(..).fillna(..)`` when passing a non-string (:issue:`12952`)
- Bug fixes in various encoding and header processing issues in ``pd.read_sas()`` (:issue:`12659`, :issue:`12654`, :issue:`12647`, :issue:`12809`)
- Bug in ``pd.crosstab()`` where would silently ignore ``aggfunc`` if ``values=None`` (:issue:`12569`).
- Potential segfault in ``DataFrame.to_json`` when serialising ``datetime.time`` (:issue:`11473`).
- Potential segfault in ``DataFrame.to_json`` when attempting to serialise 0d array (:issue:`11299`).
- Segfault in ``to_json`` when attempting to serialise a ``DataFrame`` or ``Series`` with non-ndarray values (:issue:`10778`).




- Bug in consistency of ``.name`` on ``.groupby(..).apply(..)`` cases (:issue:`12363`)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,99 @@ def my_handler_raises(obj):
DataFrame({'a': [1, 2, object()]}).to_json,
default_handler=my_handler_raises)

def test_categorical(self):
# GH4377 df.to_json segfaults with non-ndarray blocks
df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
df["B"] = df["A"]
expected = df.to_json()

df["B"] = df["A"].astype('category')
self.assertEqual(expected, df.to_json())

s = df["A"]
sc = df["B"]
self.assertEqual(s.to_json(), sc.to_json())

def test_datetime_tz(self):
# GH4377 df.to_json segfaults with non-ndarray blocks
tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
tz_naive = tz_range.tz_convert('utc').tz_localize(None)

df = DataFrame({
'A': tz_range,
'B': pd.date_range('20130101', periods=3)})

df_naive = df.copy()
df_naive['A'] = tz_naive
expected = df_naive.to_json()
self.assertEqual(expected, df.to_json())

stz = Series(tz_range)
s_naive = Series(tz_naive)
self.assertEqual(stz.to_json(), s_naive.to_json())

def test_sparse(self):
# GH4377 df.to_json segfaults with non-ndarray blocks
df = pd.DataFrame(np.random.randn(10, 4))
df.ix[:8] = np.nan

sdf = df.to_sparse()
expected = df.to_json()
self.assertEqual(expected, sdf.to_json())

s = pd.Series(np.random.randn(10))
s.ix[:8] = np.nan
ss = s.to_sparse()

expected = s.to_json()
self.assertEqual(expected, ss.to_json())

def test_tz_is_utc(self):
exp = '"2013-01-10T05:00:00.000Z"'

ts = Timestamp('2013-01-10 05:00:00Z')
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
dt = ts.to_datetime()
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))

ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
dt = ts.to_datetime()
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))

ts = Timestamp('2013-01-10 00:00:00-0500')
self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True))
dt = ts.to_datetime()
self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True))

def test_tz_range_is_utc(self):
exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
dfexp = ('{"DT":{'
'"0":"2013-01-01T05:00:00.000Z",'
'"1":"2013-01-02T05:00:00.000Z"}}')

tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
dti = pd.DatetimeIndex(tz_range)
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
df = DataFrame({'DT': dti})
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))

tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
tz='US/Eastern')
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
dti = pd.DatetimeIndex(tz_range)
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
df = DataFrame({'DT': dti})
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))

tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True))
dti = pd.DatetimeIndex(tz_range)
self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True))
df = DataFrame({'DT': dti})
self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True))


if __name__ == '__main__':
import nose
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import numpy as np
from numpy.testing import (assert_array_almost_equal_nulp,
assert_approx_equal)
import pytz
from pandas import DataFrame, Series, Index, NaT, DatetimeIndex
import pandas.util.testing as tm

Expand Down Expand Up @@ -365,15 +364,30 @@ def test_encodeTimeConversion(self):
datetime.time(),
datetime.time(1, 2, 3),
datetime.time(10, 12, 15, 343243),
datetime.time(10, 12, 15, 343243, pytz.utc),
# datetime.time(10, 12, 15, 343243, dateutil.tz.gettz('UTC')), #
# this segfaults! No idea why.
]
for test in tests:
output = ujson.encode(test)
expected = '"%s"' % test.isoformat()
self.assertEqual(expected, output)

def test_encodeTimeConversion_pytz(self):
# GH11473 to_json segfaults with timezone-aware datetimes
tm._skip_if_no_pytz()
import pytz
test = datetime.time(10, 12, 15, 343243, pytz.utc)
output = ujson.encode(test)
expected = '"%s"' % test.isoformat()
self.assertEqual(expected, output)

def test_encodeTimeConversion_dateutil(self):
# GH11473 to_json segfaults with timezone-aware datetimes
tm._skip_if_no_dateutil()
import dateutil
test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
output = ujson.encode(test)
expected = '"%s"' % test.isoformat()
self.assertEqual(expected, output)

def test_nat(self):
input = NaT
assert ujson.encode(input) == 'null', "Expected null"
Expand Down
Loading

0 comments on commit 37a7e69

Please sign in to comment.