Skip to content

Commit

Permalink
Merge pull request #46 from lelit/rj-v1.1-as-submodule-v2
Browse files Browse the repository at this point in the history
Implement issue #42: use upstream RapidJSON v1.1
  • Loading branch information
kenrobbins committed Dec 8, 2016
2 parents fc891bb + 6b4f27b commit 5405613
Show file tree
Hide file tree
Showing 47 changed files with 87 additions and 11,038 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "rapidjson"]
path = rapidjson
url = https://github.com/miloyip/rapidjson.git
6 changes: 4 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@ include LICENSE
include MANIFEST.in
include README.rst
include setup.py
recursive-include python-rapidjson *.cpp
recursive-include thirdparty/rapidjson *.h *.md license.txt
recursive-include python-rapidjson *.cpp *.h
include rapidjson readme.md license.txt
recursive-include rapidjson/include *.h
prune rapidjson/doc
17 changes: 3 additions & 14 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
.. doctest::

>>> dumps('The symbol for the Euro currency is €')
'"The symbol for the Euro currency is \\u20ac"'
'"The symbol for the Euro currency is \\u20AC"'
>>> dumps('The symbol for the Euro currency is €',
... ensure_ascii=False)
'"The symbol for the Euro currency is €"'
Expand Down Expand Up @@ -266,15 +266,14 @@
>>> dumps(random_uuid, uuid_mode=UUID_MODE_HEX) # doctest: +SKIP
'"be57634565b54fc292c594e2f82e38fd"'

.. function:: loads(s, object_hook=None, use_decimal=False, precise_float=True, \
allow_nan=True, datetime_mode=None, uuid_mode=None)
.. function:: loads(s, object_hook=None, use_decimal=False, allow_nan=True, \
datetime_mode=None, uuid_mode=None)

:param str s: The JSON string to parse
:param callable object_hook: an optional function that will be called with the result of
any object literal decoded (a :class:`dict`) and should return
the value to use instead of the :class:`dict`
:param bool use_decimal: whether :class:`Decimal` should be used for float values
:param bool precise_float: use slower-but-more-precise float parser
:param bool allow_nan: whether ``NaN`` values are recognized
:param int datetime_mode: how should :class:`datetime` and :class:`date` instances be
handled
Expand Down Expand Up @@ -313,16 +312,6 @@
>>> loads('1.2345', use_decimal=True)
Decimal('1.2345')

If `precise_float` is false (default: ``True``) then a faster but less precise algorithm
will be used to parse floats values inside the JSON structure

.. doctest::

>>> loads('1.234567890123456789')
1.2345678901234567
>>> loads('1.234567890123456789', precise_float=False)
1.234567890123457

If `allow_nan` is false (default: ``True``), then the values ``NaN`` and ``Infinity`` won't
be recognized:

Expand Down
2 changes: 1 addition & 1 deletion python-rapidjson/docstrings.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ PyDoc_STRVAR(rapidjson_module_docstring,
"Fast, simple JSON encoder and decoder. Based on RapidJSON C++ library.");

PyDoc_STRVAR(rapidjson_loads_docstring,
"loads(s, object_hook=None, use_decimal=False, precise_float=True,"
"loads(s, object_hook=None, use_decimal=False,"
" allow_nan=True, datetime_mode=None, uuid_mode=None)\n"
"\n"
"Decodes a JSON string into Python object.");
Expand Down
113 changes: 59 additions & 54 deletions python-rapidjson/rapidjson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,47 +305,54 @@ struct PyHandler {
return HandleSimpleType(value);
}

bool Double(double d, const char* decimal, SizeType length, bool minus, size_t decimalPosition, int exp) {
PyObject* value;
if (!useDecimal)
value = PyFloat_FromDouble(d);
else {
const int MAX_FINAL_SIZE = 512;
const int MAX_EXP_SIZE = 11; // 32-bit
const int MAX_DECIMAL_SIZE = MAX_FINAL_SIZE - MAX_EXP_SIZE - 3; // e, +/-, \0

exp += decimalPosition - length;

if (length > MAX_DECIMAL_SIZE)
length = MAX_DECIMAL_SIZE;
bool Double(double d) {
PyObject* value = PyFloat_FromDouble(d);
return HandleSimpleType(value);
}

char finalStr[MAX_DECIMAL_SIZE];
finalStr[0] = minus ? '-' : '+';
memcpy(finalStr+1, decimal, length);
bool RawNumber(const char* str, SizeType length, bool copy) {
PyObject* value;
bool isFloat = false;

for (int i = length - 1; i >= 0; --i) {
// consider it a float if there is at least one non-digit character,
// it may be either a decimal number or +-infinity or nan
if (!isdigit(str[i]) && str[i] != '-') {
isFloat = true;
break;
}
}

if (exp == 0)
finalStr[length+1] = 0;
else {
char expStr[MAX_EXP_SIZE];
char* end = internal::i32toa(exp, expStr);
size_t len = end - expStr;
if (isFloat) {
PyObject* pystr = PyUnicode_FromStringAndSize(str, length);

finalStr[length+1] = 'e';
memcpy(finalStr+length+2, expStr, len);
finalStr[length+2+len] = 0;
if (pystr == NULL) {
return false;
}

PyObject* raw = PyUnicode_FromString(finalStr);
if (raw == NULL) {
PyErr_SetString(PyExc_ValueError, "Error generating decimal representation");
return false;
if (!useDecimal) {
value = PyFloat_FromString(pystr);
} else {
value = PyObject_CallFunctionObjArgs(rapidjson_decimal_type, pystr, NULL);
}

value = PyObject_CallFunctionObjArgs(rapidjson_decimal_type, raw, NULL);
Py_DECREF(raw);
Py_DECREF(pystr);
} else {
char zstr[length + 1];

strncpy(zstr, str, length);
zstr[length] = '\0';

value = PyLong_FromString(zstr, NULL, 10);
}

return HandleSimpleType(value);
if (value == NULL) {
PyErr_SetString(PyExc_ValueError,
isFloat ? "Invalid float value" : "Invalid integer value");
return false;
} else {
return HandleSimpleType(value);
}
}

#define digit(idx) (str[idx] - '0')
Expand Down Expand Up @@ -714,7 +721,6 @@ rapidjson_loads(PyObject* self, PyObject* args, PyObject* kwargs)
PyObject* jsonObject;
PyObject* objectHook = NULL;
int useDecimal = 0;
int preciseFloat = 1;
int allowNan = 1;
PyObject* datetimeModeObj = NULL;
DatetimeMode datetimeMode = DATETIME_MODE_NONE;
Expand All @@ -725,19 +731,17 @@ rapidjson_loads(PyObject* self, PyObject* args, PyObject* kwargs)
"s",
"object_hook",
"use_decimal",
"precise_float",
"allow_nan",
"datetime_mode",
"uuid_mode",
NULL
};

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OpppOO:rapidjson.loads",
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OppOO:rapidjson.loads",
(char **) kwlist,
&jsonObject,
&objectHook,
&useDecimal,
&preciseFloat,
&allowNan,
&datetimeModeObj,
&uuidModeObj))
Expand Down Expand Up @@ -788,10 +792,12 @@ rapidjson_loads(PyObject* self, PyObject* args, PyObject* kwargs)
Reader reader;
InsituStringStream ss(jsonStrCopy);

if (preciseFloat)
reader.Parse<kParseInsituFlag | kParseFullPrecisionFlag>(ss, handler);
if (allowNan)
reader.Parse<kParseInsituFlag |
kParseNumbersAsStringsFlag |
kParseNanAndInfFlag>(ss, handler);
else
reader.Parse<kParseInsituFlag>(ss, handler);
reader.Parse<kParseInsituFlag | kParseNumbersAsStringsFlag>(ss, handler);

if (reader.HasParseError()) {
SizeType offset = reader.GetErrorOffset();
Expand Down Expand Up @@ -920,24 +926,23 @@ rapidjson_dumps_internal(
return NULL;
}

writer->RawNumber(decStr, size);
writer->RawValue(decStr, size, kNumberType);
Py_DECREF(decStrObj);
}
else if (PyLong_Check(object)) {
int overflow;
long long i = PyLong_AsLongLongAndOverflow(object, &overflow);
if (i == -1 && PyErr_Occurred())
PyObject* intStrObj = PyObject_Str(object);
if (intStrObj == NULL)
return NULL;

if (overflow == 0) {
writer->Int64(i);
} else {
unsigned long long ui = PyLong_AsUnsignedLongLong(object);
if (PyErr_Occurred())
return NULL;

writer->Uint64(ui);
Py_ssize_t size;
char* intStr = PyUnicode_AsUTF8AndSize(intStrObj, &size);
if (intStr == NULL) {
Py_DECREF(intStrObj);
return NULL;
}

writer->RawValue(intStr, size, kNumberType);
Py_DECREF(intStrObj);
}
else if (PyFloat_Check(object)) {
double d = PyFloat_AsDouble(object);
Expand All @@ -946,7 +951,7 @@ rapidjson_dumps_internal(

if (Py_IS_NAN(d)) {
if (allowNan)
writer->RawNumber("NaN", 3);
writer->RawValue("NaN", 3, kNumberType);
else {
PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
return NULL;
Expand All @@ -957,9 +962,9 @@ rapidjson_dumps_internal(
return NULL;
}
else if (d < 0)
writer->RawNumber("-Infinity", 9);
writer->RawValue("-Infinity", 9, kNumberType);
else
writer->RawNumber("Infinity", 8);
writer->RawValue("Infinity", 8, kNumberType);
}
else
writer->Double(d);
Expand Down
1 change: 1 addition & 0 deletions rapidjson
Submodule rapidjson added at f54b0e
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[pytest]
[tool:pytest]
norecursedirs = *.egg .tox
addopts =
# Shows a line for every test
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def find_author():
rapidjson = Extension(
'rapidjson',
sources=['./python-rapidjson/rapidjson.cpp'],
include_dirs=['./thirdparty/rapidjson/include'],
include_dirs=['./rapidjson/include'],
)

setup(
Expand Down
32 changes: 3 additions & 29 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,8 @@
def pytest_benchmark_group_stats(config, benchmarks, group_by):
result = {}
for bench in benchmarks:
if config.option.compare_other_engines:
engine, data_kind = bench.param.split('-')
if engine.endswith('not precise'):
group = result.setdefault("not precise floats: %s" % bench.group, [])
else:
group = result.setdefault("%s: %s" % (data_kind, bench.group), [])
else:
group = result.setdefault(bench.group, [])
engine, data_kind = bench.param.split('-')
group = result.setdefault("%s: %s" % (data_kind, bench.group), [])
group.append(bench)
return sorted(result.items())

Expand All @@ -28,18 +22,12 @@ def pytest_addoption(parser):


contenders = []
inaccurate_floats_contenders = []

import rapidjson

contenders.append(Contender('rapidjson',
rapidjson.dumps,
partial(rapidjson.loads, precise_float=True)))

inaccurate_floats_contenders.append(Contender('rapidjson not precise',
rapidjson.dumps,
partial(rapidjson.loads,
precise_float=False)))
rapidjson.loads))

try:
import yajl
Expand Down Expand Up @@ -76,10 +64,6 @@ def pytest_addoption(parser):
contenders.append(Contender('ujson',
ujson.dumps,
partial(ujson.loads, precise_float=True)))
inaccurate_floats_contenders.append(Contender('ujson not precise',
ujson.dumps,
partial(ujson.loads,
precise_float=False)))


def pytest_generate_tests(metafunc):
Expand All @@ -89,16 +73,6 @@ def pytest_generate_tests(metafunc):
else:
metafunc.parametrize('contender', contenders[:1], ids=attrgetter('name'))

if 'inaccurate_floats_contender' in metafunc.fixturenames:
if metafunc.config.option.compare_other_engines:
metafunc.parametrize('inaccurate_floats_contender',
inaccurate_floats_contenders,
ids=attrgetter('name'))
else:
metafunc.parametrize('inaccurate_floats_contender',
inaccurate_floats_contenders[:1],
ids=attrgetter('name'))

if 'datetimes_loads_contender' in metafunc.fixturenames:
metafunc.parametrize('datetimes_loads_contender',
[rapidjson.loads,
Expand Down
8 changes: 2 additions & 6 deletions tests/test_base_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
@pytest.mark.parametrize(
'value', [
'A', 'cruel\x00world', 1, -1, 2.3, {'foo': 'bar'}, [1, 2, 'a', 1.2, {'foo': 'bar'},],
sys.maxsize
sys.maxsize, sys.maxsize**2
])
def test_base_values(value):
dumped = rapidjson.dumps(value)
loaded = rapidjson.loads(dumped)
assert loaded == value
assert loaded == value and type(loaded) is type(value)


@pytest.mark.unit
Expand Down Expand Up @@ -45,8 +45,6 @@ def test_larger_structure():

@pytest.mark.unit
def test_object_hook():
import simplejson as json

def as_complex(dct):
if '__complex__' in dct:
return complex(dct['real'], dct['imag'])
Expand Down Expand Up @@ -75,8 +73,6 @@ def encode_complex(obj):

@pytest.mark.unit
def test_doubles():
doubles = []

for x in range(100000):
d = sys.maxsize * random.random()
dumped = rapidjson.dumps(d)
Expand Down
11 changes: 1 addition & 10 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,7 @@ def test_loads(contender, data, benchmark):
benchmark(contender.loads, data)


# Special case 1: precise vs unprecise

@pytest.mark.benchmark(group='deserialize')
@pytest.mark.parametrize('data', [doubles], ids=['256 doubles array'])
def test_loads_float(inaccurate_floats_contender, data, benchmark):
data = inaccurate_floats_contender.dumps(doubles)
benchmark(inaccurate_floats_contender.loads, data)


# Special case 2: load datetimes as plain strings vs datetime.xxx instances
# Special case: load datetimes as plain strings vs datetime.xxx instances

@pytest.mark.benchmark(group='deserialize')
@pytest.mark.parametrize('data', [datetimes], ids=['256x3 datetimes'])
Expand Down

0 comments on commit 5405613

Please sign in to comment.