Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: ujson labels are encoded twice #4593

Merged
merged 1 commit into from
Aug 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
- In ``to_json``, fix date handling so milliseconds are the default timestamp
as the docstring says (:issue:`4362`).
- JSON NaT handling fixed, NaTs are now serialised to `null` (:issue:`4498`)
- Fixed JSON handling of escapable characters in JSON object keys (:issue:`4593`)
- Fixed passing ``keep_default_na=False`` when ``na_values=None`` (:issue:`4318`)
- Fixed bug with ``values`` raising an error on a DataFrame with duplicate columns and mixed
dtypes, surfaced in (:issue:`4377`)
Expand Down
17 changes: 16 additions & 1 deletion pandas/io/tests/test_json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
assert_series_equal, network,
ensure_clean)
ensure_clean, assert_index_equal)
import pandas.util.testing as tm
from numpy.testing.decorators import slow

Expand Down Expand Up @@ -53,6 +53,21 @@ def setUp(self):
self.tsframe = _tsframe.copy()
self.mixed_frame = _mixed_frame.copy()

def test_frame_double_encoded_labels(self):
df = DataFrame([['a', 'b'], ['c', 'd']],
index=['index " 1', 'index / 2'],
columns=['a \\ b', 'y / z'])

assert_frame_equal(
df, read_json(df.to_json(orient='split'), orient='split'))
assert_frame_equal(
df, read_json(df.to_json(orient='columns'), orient='columns'))
assert_frame_equal(
df, read_json(df.to_json(orient='index'), orient='index'))
df_unser = read_json(df.to_json(orient='records'), orient='records')
assert_index_equal(df.columns, df_unser.columns)
np.testing.assert_equal(df.values, df_unser.values)

def test_frame_non_unique_index(self):
df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
columns=['x', 'y'])
Expand Down
37 changes: 24 additions & 13 deletions pandas/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc)

char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
JSONObjectEncoder* enc = (JSONObjectEncoder*) tc->encoder;
NpyArrContext* npyarr;
npy_intp idx;
PRINTMARK();
Expand All @@ -496,13 +497,19 @@ char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
idx = npyarr->index[npyarr->stridedim] - 1;
*outLen = strlen(npyarr->columnLabels[idx]);
return npyarr->columnLabels[idx];
memcpy(enc->offset, npyarr->columnLabels[idx], sizeof(char)*(*outLen));
enc->offset += *outLen;
*outLen = 0;
return NULL;
}
else
{
idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
*outLen = strlen(npyarr->rowLabels[idx]);
return npyarr->rowLabels[idx];
memcpy(enc->offset, npyarr->rowLabels[idx], sizeof(char)*(*outLen));
enc->offset += *outLen;
*outLen = 0;
return NULL;
}
}

Expand Down Expand Up @@ -1064,7 +1071,7 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
// NOTE this function steals a reference to labels.
PyArrayObject* labelsTmp = NULL;
PyObject* item = NULL;
npy_intp i, stride, len;
npy_intp i, stride, len, need_quotes;
char** ret;
char *dataptr, *cLabel, *origend, *origst, *origoffset;
char labelBuffer[NPY_JSON_BUFSIZE];
Expand Down Expand Up @@ -1117,15 +1124,8 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
break;
}

// trim off any quotes surrounding the result
if (*cLabel == '\"')
{
cLabel++;
enc->offset -= 2;
*(enc->offset) = '\0';
}

len = enc->offset - cLabel + 1;
need_quotes = ((*cLabel) != '"');
len = enc->offset - cLabel + 1 + 2 * need_quotes;
ret[i] = PyObject_Malloc(sizeof(char)*len);

if (!ret[i])
Expand All @@ -1135,7 +1135,18 @@ char** NpyArr_encodeLabels(PyArrayObject* labels, JSONObjectEncoder* enc, npy_in
break;
}

memcpy(ret[i], cLabel, sizeof(char)*len);
if (need_quotes)
{
ret[i][0] = '"';
memcpy(ret[i]+1, cLabel, sizeof(char)*(len-4));
ret[i][len-3] = '"';
}
else
{
memcpy(ret[i], cLabel, sizeof(char)*(len-2));
}
ret[i][len-2] = ':';
ret[i][len-1] = '\0';
dataptr += stride;
}

Expand Down