Skip to content

Commit

Permalink
Add optional C extension wrapper for Python JSON parsing
Browse files Browse the repository at this point in the history
The pure Python in-tree JSON parser is *much* slower than the
in-tree C JSON parser. A local test parsing a 100Mb JSON file
showed the Python version taking 270 seconds. With the C wrapper,
it took under 4 seconds.

The C extension will be used automatically if it can be built. If
the extension fails to build, a warning is displayed and the build
is restarted without the extension.

The Serializer class is replaced with Python's built-in
JSON library since the ability to process chunked data is not
needed in that case.

The extension should work with both Python 2.7 and Python 3.3+.

Signed-off-by: Terry Wilson <twilson@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
  • Loading branch information
otherwiseguy authored and blp committed Jun 8, 2016
1 parent 2c362f1 commit c63b04d
Show file tree
Hide file tree
Showing 5 changed files with 332 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ config-h-check:
@cd $(srcdir); \
if test -e .git && (git --version) >/dev/null 2>&1 && \
git --no-pager grep -L '#include <config\.h>' `git ls-files | grep '\.c$$' | \
grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows'`; \
grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows|^python'`; \
then \
echo "See above for list of violations of the rule that"; \
echo "every C source file must #include <config.h>."; \
Expand Down
3 changes: 3 additions & 0 deletions python/automake.mk
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ EXTRA_DIST += \
python/README.rst \
python/setup.py

# C extension support.
EXTRA_DIST += python/ovs/_json.c

PYFILES = $(ovs_pyfiles) python/ovs/dirs.py $(ovstest_pyfiles)
EXTRA_DIST += $(PYFILES)
PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover)
Expand Down
268 changes: 268 additions & 0 deletions python/ovs/_json.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
#include "Python.h"
#include <openvswitch/lib/json.h>
#include "structmember.h"

#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#endif

typedef struct {
PyObject_HEAD
struct json_parser *_parser;
} json_ParserObject;

static void
Parser_dealloc(json_ParserObject * p)
{
json_parser_abort(p->_parser);
Py_TYPE(p)->tp_free(p);
}

static PyObject *
Parser_new(PyTypeObject * type, PyObject * args, PyObject * kwargs)
{
json_ParserObject *self;
static char *kwlist[] = { "check_trailer", NULL };
PyObject *check_trailer = NULL;
int ct_int = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist,
&check_trailer)) {
return NULL;
}

if (check_trailer != NULL) {
ct_int = PyObject_IsTrue(check_trailer);
if (ct_int < 0) {
return NULL;
} else if (ct_int) {
ct_int = JSPF_TRAILER;
}
}

self = (json_ParserObject *) type->tp_alloc(type, 0);
if (self != NULL) {
self->_parser = json_parser_create(ct_int);
}

return (PyObject *) self;
}

static PyObject *
Parser_feed(json_ParserObject * self, PyObject * args)
{
Py_ssize_t input_sz;
PyObject *input;
size_t rd;
char *input_str;

if (self->_parser == NULL) {
return NULL;
}

if (!PyArg_UnpackTuple(args, "input", 1, 1, &input)) {
return NULL;
}
#ifdef IS_PY3K
if ((input_str = PyUnicode_AsUTF8AndSize(input, &input_sz)) == NULL) {
#else
if (PyString_AsStringAndSize(input, &input_str, &input_sz) < 0) {
#endif
return NULL;
}

rd = json_parser_feed(self->_parser, input_str, (size_t) input_sz);

#ifdef IS_PY3K
return PyLong_FromSize_t(rd);
#else
return PyInt_FromSize_t(rd);
#endif
}

static PyObject *
Parser_is_done(json_ParserObject * self)
{
if (self->_parser == NULL) {
return NULL;
}
return PyBool_FromLong(json_parser_is_done(self->_parser));
}

static PyObject *
json_to_python(struct json *json)
{
switch (json->type) {
case JSON_NULL:
Py_RETURN_NONE;
case JSON_FALSE:
Py_RETURN_FALSE;
case JSON_TRUE:
Py_RETURN_TRUE;
case JSON_OBJECT:{
struct shash_node *node;
PyObject *dict = PyDict_New();

if (dict == NULL) {
return PyErr_NoMemory();
}
SHASH_FOR_EACH(node, json->u.object) {
PyObject *key = PyUnicode_FromString(node->name);
PyObject *val = json_to_python(node->data);

if (!(key && val) || PyDict_SetItem(dict, key, val)) {
Py_XDECREF(key);
Py_XDECREF(val);
Py_XDECREF(dict);
return NULL;
}

Py_XDECREF(key);
Py_XDECREF(val);
}
return dict;
}
case JSON_ARRAY:{
int i;
PyObject *arr = PyList_New(json->u.array.n);

if (arr == NULL) {
return PyErr_NoMemory();
}
for (i = 0; i < json->u.array.n; i++) {
PyObject *item = json_to_python(json->u.array.elems[i]);

if (!item || PyList_SetItem(arr, i, item)) {
Py_XDECREF(arr);
return NULL;
}
}
return arr;
}
case JSON_REAL:
if (json->u.real != 0) {
return PyFloat_FromDouble(json->u.real);
} /* fall through to treat 0 as int */
case JSON_INTEGER:
#ifdef IS_PY3K
return PyLong_FromLong((long) json->u.integer);
#else
return PyInt_FromLong((long) json->u.integer);
#endif

case JSON_STRING:
return PyUnicode_FromString(json->u.string);
default:
return NULL;
}
}

static PyObject *
Parser_finish(json_ParserObject * self)
{
struct json *json;
PyObject *obj;

if (self->_parser == NULL) {
return NULL;
}

json = json_parser_finish(self->_parser);
self->_parser = NULL;
obj = json_to_python(json);
return obj;
}

static PyMethodDef Parser_methods[] = {
{"feed", (PyCFunction) Parser_feed, METH_VARARGS,
"Feed data to the parser and return the index of the last object."},
{"is_done", (PyCFunction) Parser_is_done, METH_NOARGS,
"Whether the parser has finished decoding an object."},
{"finish", (PyCFunction) Parser_finish, METH_NOARGS,
"Finish parsing and return Python object parsed."},
{NULL},
};

static PyTypeObject json_ParserType = {
PyVarObject_HEAD_INIT(NULL, 0)
"ovs._json.Parser", /* tp_name */
sizeof (json_ParserObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) Parser_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
"Parser objects", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Parser_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
Parser_new, /* tp_new */
};

#ifdef IS_PY3K
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"ovs._json", /* m_name */
"OVS JSON Parser module", /* m_doc */
0, /* m_size */
0, /* m_methods */
0, /* m_slots */
0, /* m_traverse */
0, /* m_clear */
0, /* m_free */
};

#define INITERROR return NULL
#else /* !IS_PY3K */
#define INITERROR return
#endif

PyMODINIT_FUNC
#ifdef IS_PY3K
PyInit__json(void)
#else
init_json(void)
#endif
{
PyObject *m;

if (PyType_Ready(&json_ParserType) < 0) {
INITERROR;
}
#ifdef IS_PY3K
m = PyModule_Create(&moduledef);
#else
m = Py_InitModule3("ovs._json", NULL, "OVS JSON Parser module");
#endif

Py_INCREF(&json_ParserType);
PyModule_AddObject(m, "Parser", (PyObject *) & json_ParserType);
#ifdef IS_PY3K
return m;
#endif
}
11 changes: 11 additions & 0 deletions python/ovs/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
import six
from six.moves import range

try:
import ovs._json
except ImportError:
pass

__pychecker__ = 'no-stringiter'

escapes = {ord('"'): u"\\\"",
Expand Down Expand Up @@ -165,6 +170,12 @@ class Parser(object):
# Maximum height of parsing stack. #
MAX_HEIGHT = 1000

def __new__(cls, *args, **kwargs):
try:
return ovs._json.Parser(*args, **kwargs)
except NameError:
return super(Parser, cls).__new__(cls)

def __init__(self, check_trailer=False):
self.check_trailer = check_trailer

Expand Down
51 changes: 49 additions & 2 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from __future__ import print_function
import sys

from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, \
DistutilsPlatformError

import setuptools

VERSION = "unknown"
Expand All @@ -25,8 +29,33 @@
file=sys.stderr)
sys.exit(-1)

ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
if sys.platform == 'win32':
ext_errors += (IOError, ValueError)


class BuildFailed(Exception):
pass


class try_build_ext(build_ext):
# This class allows C extension building to fail
# NOTE: build_ext is not a new-style class

def run(self):
try:
build_ext.run(self)
except DistutilsPlatformError:
raise BuildFailed()

setuptools.setup(
def build_extension(self, ext):
try:
build_ext.build_extension(self, ext)
except ext_errors:
raise BuildFailed()


setup_args = dict(
name='ovs',
description='Open vSwitch library',
version=VERSION,
Expand All @@ -46,5 +75,23 @@
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
]
],
ext_modules=[setuptools.Extension("ovs._json", sources=["ovs/_json.c"],
libraries=['openvswitch'])],
cmdclass={'build_ext': try_build_ext},
)

try:
setuptools.setup(**setup_args)
except BuildFailed:
BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, "
"speedups are not enabled.")
print("*" * 75)
print(BUILD_EXT_WARNING)
print("Failure information, if any, is above.")
print("Retrying the build without the C extension.")
print("*" * 75)

del(setup_args['cmdclass'])
del(setup_args['ext_modules'])
setuptools.setup(**setup_args)

4 comments on commit c63b04d

@Yugandhan
Copy link

@Yugandhan Yugandhan commented on c63b04d May 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm having an issue on this commit. Shall we sync @otherwiseguy ??

@blp
Copy link
Contributor

@blp blp commented on c63b04d Jun 5, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect that @otherwiseguy is no longer involved in OVS. I'd suggest reporting the issue you see to the OVS mailing list to see if someone else can help you.

@otherwiseguy
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still here, just missed this. There have been several fixes related to this commit. I'll look for your ovs mailing list post.

@blp
Copy link
Contributor

@blp blp commented on c63b04d Jun 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@otherwiseguy Thanks for the correction!

Please sign in to comment.