Skip to content

Commit

Permalink
Support parsing application/json-seq
Browse files Browse the repository at this point in the history
Support parsing RS-separated streams, as per RFC 7464.
  • Loading branch information
spbnick committed May 30, 2024
1 parent 3580e9d commit ca928f2
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 25 deletions.
73 changes: 48 additions & 25 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ cdef extern from "jv.h":
JV_KIND_ARRAY,
JV_KIND_OBJECT

ctypedef enum:
JV_PARSE_SEQ,
JV_PARSE_STREAMING,
JV_PARSE_STREAM_ERRORS

ctypedef struct jv:
pass

Expand Down Expand Up @@ -49,6 +54,7 @@ cdef extern from "jv.h":
jv_parser* jv_parser_new(int)
void jv_parser_free(jv_parser*)
void jv_parser_set_buf(jv_parser*, const char*, int, int)
int jv_parser_remaining(jv_parser*)
jv jv_parser_next(jv_parser*)

jv jv_parse(const char*)
Expand Down Expand Up @@ -247,27 +253,34 @@ cdef class _Program(object):
self._program_bytes = program_bytes
self._jq_state_pool = _JqStatePool(program_bytes, args=args)

def input(self, value=_NO_VALUE, text=_NO_VALUE, *, slurp=False):
def input(self, value=_NO_VALUE, text=_NO_VALUE, *,
slurp=False, seq=False):
if (value is _NO_VALUE) == (text is _NO_VALUE):
raise ValueError("Either the value or text argument should be set")

if text is not _NO_VALUE:
return self.input_text(text, slurp=slurp)
return self.input_text(text, slurp=slurp, seq=seq)
else:
return self.input_value(value, slurp=slurp)
return self.input_value(value, slurp=slurp, seq=seq)

def input_value(self, value, *, slurp=False):
return self.input_text(json.dumps(value), slurp=slurp)
def input_value(self, value, *, slurp=False, seq=False):
text = json.dumps(value)
if seq:
text = "\x1e" + text
return self.input_text(text, slurp=slurp, seq=seq)

def input_values(self, values, *, slurp=False):
def input_values(self, values, *, slurp=False, seq=False):
fileobj = io.StringIO()
for value in values:
if seq:
fileobj.write("\x1e")
json.dump(value, fileobj)
fileobj.write("\n")
return self.input_text(fileobj.getvalue(), slurp=slurp)
return self.input_text(fileobj.getvalue(), slurp=slurp, seq=seq)

def input_text(self, text, *, slurp=False):
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp)
def input_text(self, text, *, slurp=False, seq=False):
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"),
slurp=slurp, seq=seq)

@property
def program_string(self):
Expand All @@ -291,24 +304,30 @@ cdef class _ProgramWithInput(object):
cdef _JqStatePool _jq_state_pool
cdef object _bytes_input
cdef bint _slurp
cdef bint _seq

def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp):
def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp, bint seq):
self._jq_state_pool = jq_state_pool
self._bytes_input = bytes_input
self._slurp = slurp
self._seq = seq

def __iter__(self):
return self._make_iterator()

cdef _ResultIterator _make_iterator(self):
return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp)
return _ResultIterator(self._jq_state_pool, self._bytes_input,
slurp=self._slurp, seq=self._seq)

def text(self):
# Performance testing suggests that using _jv_to_python (within the
# result iterator) followed by json.dumps is faster than using
# jv_dump_string to generate the string directly from the jv values.
# See: https://github.com/mwilliamson/jq.py/pull/50
return "\n".join(json.dumps(v) for v in self)
if self._seq:
return "\x1e" + "\n\x1e".join(json.dumps(v) for v in self)
else:
return "\n".join(json.dumps(v) for v in self)

def all(self):
return list(self)
Expand All @@ -329,13 +348,14 @@ cdef class _ResultIterator(object):
self._jq_state_pool.release(self._jq)
jv_parser_free(self._parser)

def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp):
def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *,
bint slurp, bint seq):
self._jq_state_pool = jq_state_pool
self._jq = jq_state_pool.acquire()
self._bytes_input = bytes_input
self._slurp = slurp
self._ready = False
cdef jv_parser* parser = jv_parser_new(0)
cdef jv_parser* parser = jv_parser_new(JV_PARSE_SEQ if seq else 0)
cdef char* cbytes_input
cdef ssize_t clen_input
PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input)
Expand Down Expand Up @@ -384,17 +404,20 @@ cdef class _ResultIterator(object):
return 0

cdef inline jv _parse_next_input(self) except *:
cdef jv value = jv_parser_next(self._parser)
if jv_is_valid(value):
return value
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_to_py_string(error_message)
jv_free(error_message)
raise ValueError(u"parse error: " + message)
else:
jv_free(value)
raise StopIteration()
cdef jv value
while True:
value = jv_parser_next(self._parser)
if jv_is_valid(value):
return value
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_to_py_string(error_message)
jv_free(error_message)
raise ValueError(u"parse error: " + message)
else:
if not jv_parser_remaining(self._parser):
jv_free(value)
raise StopIteration()


def all(program, value=_NO_VALUE, text=_NO_VALUE):
Expand Down
51 changes: 51 additions & 0 deletions tests/jq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,57 @@ def test_unicode_strings_can_be_used_as_input():
)


def test_record_separator_character_accepted_in_input():
assert_equal(
[],
list(jq.compile(".").input(text='\x1e', seq=True))
)
assert_equal(
[],
list(jq.compile(".").input(text='\x1e\x1e', seq=True))
)
assert_equal(
[{}],
list(jq.compile(".").input(text='\x1e{}', seq=True))
)
assert_equal(
[{}],
list(jq.compile(".").input(text='\x1e\x1e{}', seq=True))
)
assert_equal(
[],
list(jq.compile(".").input(text='{}\x1e', seq=True))
)
assert_equal(
[],
list(jq.compile(".").input(text='{}\x1e\x1e', seq=True))
)
assert_equal(
[{}],
list(jq.compile(".").input(text='\x1e{}\x1e', seq=True))
)
assert_equal(
[[]],
list(jq.compile(".").input(text='{}\x1e[]', seq=True))
)
assert_equal(
[[]],
list(jq.compile(".").input(text='{}\x1e\x1e[]', seq=True))
)
assert_equal(
[{},[]],
list(jq.compile(".").input(text='\x1e{}\x1e[]', seq=True))
)
assert_equal(
[[]],
list(jq.compile(".").input(text='{}\x1e[]\x1e', seq=True))
)
assert_equal(
[{},[]],
list(jq.compile(".").input(text='\x1e{}\x1e[]\x1e', seq=True))
)


def test_unicode_strings_can_be_used_as_programs():
assert_equal(
"Dragon‽",
Expand Down

0 comments on commit ca928f2

Please sign in to comment.