Skip to content

Commit

Permalink
Merge pull request #10322 from evanpw/json
Browse files Browse the repository at this point in the history
Bug in to_json causing segfault with a CategoricalIndex (GH #10317)
  • Loading branch information
jreback committed Jun 10, 2015
2 parents ba69a49 + 588437c commit 07ea11c
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 36 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Bug Fixes
- Bug where read_hdf store.select modifies the passed columns list when
multi-indexed (:issue:`7212`)
- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
- Bug in ``to_json`` with certain orients and a ``CategoricalIndex`` would segfault (:issue:`10317`)

- Bug where some of the nan funcs do not have consistent return dtypes (:issue:`10251`)

Expand Down
82 changes: 55 additions & 27 deletions pandas/io/tests/test_json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os

import numpy as np
from pandas import Series, DataFrame, DatetimeIndex, Timestamp
from pandas import Series, DataFrame, DatetimeIndex, Timestamp, CategoricalIndex
from datetime import timedelta
import pandas as pd
read_json = pd.read_json
Expand All @@ -23,6 +23,11 @@
for k, v in compat.iteritems(_seriesd)))

_tsframe = DataFrame(_tsd)
_cat_frame = _frame.copy()
cat = ['bah']*5 + ['bar']*5 + ['baz']*5 + ['foo']*(len(_cat_frame)-15)
_cat_frame.index = pd.CategoricalIndex(cat,name='E')
_cat_frame['E'] = list(reversed(cat))
_cat_frame['sort'] = np.arange(len(_cat_frame))

_mixed_frame = _frame.copy()

Expand All @@ -48,6 +53,7 @@ def setUp(self):
self.intframe = _intframe.copy()
self.tsframe = _tsframe.copy()
self.mixed_frame = _mixed_frame.copy()
self.categorical = _cat_frame.copy()

def tearDown(self):
del self.dirpath
Expand Down Expand Up @@ -128,8 +134,22 @@ def _check(df):

def test_frame_from_json_to_json(self):
def _check_orient(df, orient, dtype=None, numpy=False,
convert_axes=True, check_dtype=True, raise_ok=None):
df = df.sort()
convert_axes=True, check_dtype=True, raise_ok=None,
sort=None):
if sort is not None:
df = df.sort(sort)
else:
df = df.sort()

# if we are not unique, then check that we are raising ValueError
# for the appropriate orients
if not df.index.is_unique and orient in ['index','columns']:
self.assertRaises(ValueError, lambda : df.to_json(orient=orient))
return
if not df.columns.is_unique and orient in ['index','columns','records']:
self.assertRaises(ValueError, lambda : df.to_json(orient=orient))
return

dfjson = df.to_json(orient=orient)

try:
Expand All @@ -141,7 +161,10 @@ def _check_orient(df, orient, dtype=None, numpy=False,
return
raise

unser = unser.sort()
if sort is not None and sort in unser.columns:
unser = unser.sort(sort)
else:
unser = unser.sort()

if dtype is False:
check_dtype=False
Expand All @@ -160,7 +183,9 @@ def _check_orient(df, orient, dtype=None, numpy=False,
# index and col labels might not be strings
unser.index = [str(i) for i in unser.index]
unser.columns = [str(i) for i in unser.columns]
unser = unser.sort()

if sort is None:
unser = unser.sort()
assert_almost_equal(df.values, unser.values)
else:
if convert_axes:
Expand All @@ -169,45 +194,45 @@ def _check_orient(df, orient, dtype=None, numpy=False,
assert_frame_equal(df, unser, check_less_precise=False,
check_dtype=check_dtype)

def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None):
def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None):

# numpy=False
if convert_axes:
_check_orient(df, "columns", dtype=dtype)
_check_orient(df, "records", dtype=dtype)
_check_orient(df, "split", dtype=dtype)
_check_orient(df, "index", dtype=dtype)
_check_orient(df, "values", dtype=dtype)

_check_orient(df, "columns", dtype=dtype, convert_axes=False)
_check_orient(df, "records", dtype=dtype, convert_axes=False)
_check_orient(df, "split", dtype=dtype, convert_axes=False)
_check_orient(df, "index", dtype=dtype, convert_axes=False)
_check_orient(df, "values", dtype=dtype ,convert_axes=False)
_check_orient(df, "columns", dtype=dtype, sort=sort)
_check_orient(df, "records", dtype=dtype, sort=sort)
_check_orient(df, "split", dtype=dtype, sort=sort)
_check_orient(df, "index", dtype=dtype, sort=sort)
_check_orient(df, "values", dtype=dtype, sort=sort)

_check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort)
_check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort)
_check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort)
_check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort)
_check_orient(df, "values", dtype=dtype ,convert_axes=False, sort=sort)

# numpy=True and raise_ok might be not None, so ignore the error
if convert_axes:
_check_orient(df, "columns", dtype=dtype, numpy=True,
raise_ok=raise_ok)
raise_ok=raise_ok, sort=sort)
_check_orient(df, "records", dtype=dtype, numpy=True,
raise_ok=raise_ok)
raise_ok=raise_ok, sort=sort)
_check_orient(df, "split", dtype=dtype, numpy=True,
raise_ok=raise_ok)
raise_ok=raise_ok, sort=sort)
_check_orient(df, "index", dtype=dtype, numpy=True,
raise_ok=raise_ok)
raise_ok=raise_ok, sort=sort)
_check_orient(df, "values", dtype=dtype, numpy=True,
raise_ok=raise_ok)
raise_ok=raise_ok, sort=sort)

_check_orient(df, "columns", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok)
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "records", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok)
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "split", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok)
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "index", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok)
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "values", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok)
convert_axes=False, raise_ok=raise_ok, sort=sort)

# basic
_check_all_orients(self.frame)
Expand All @@ -233,6 +258,9 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None):
_check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
convert_axes=False, raise_ok=ValueError)

# categorical
_check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)

# empty
_check_all_orients(self.empty_frame)

Expand Down
29 changes: 20 additions & 9 deletions pandas/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -1814,7 +1814,7 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in

void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
{
PyObject *obj, *exc, *toDictFunc, *tmpObj;
PyObject *obj, *exc, *toDictFunc, *tmpObj, *getValuesFunc;
TypeContext *pc;
PyObjectEncoder *enc;
double val;
Expand Down Expand Up @@ -2082,14 +2082,25 @@ void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc)
return;
}

PRINTMARK();
tc->type = JT_ARRAY;
pc->newObj = PyObject_GetAttrString(obj, "values");
pc->iterBegin = NpyArr_iterBegin;
pc->iterEnd = NpyArr_iterEnd;
pc->iterNext = NpyArr_iterNext;
pc->iterGetValue = NpyArr_iterGetValue;
pc->iterGetName = NpyArr_iterGetName;
PyObject* getValuesFunc = PyObject_GetAttrString(obj, "get_values");
if (getValuesFunc)
{
PRINTMARK();
tc->type = JT_ARRAY;
pc->newObj = PyObject_CallObject(getValuesFunc, NULL);
pc->iterBegin = NpyArr_iterBegin;
pc->iterEnd = NpyArr_iterEnd;
pc->iterNext = NpyArr_iterNext;
pc->iterGetValue = NpyArr_iterGetValue;
pc->iterGetName = NpyArr_iterGetName;

Py_DECREF(getValuesFunc);
}
else
{
goto INVALID;
}

return;
}
else
Expand Down

0 comments on commit 07ea11c

Please sign in to comment.