diff --git a/llsd/__init__.py b/llsd/__init__.py index 85c3471..e247e62 100644 --- a/llsd/__init__.py +++ b/llsd/__init__.py @@ -7,40 +7,55 @@ http://wiki.secondlife.com/wiki/LLSD """ -from llsd.base import (_LLSD, BINARY_MIME_TYPE, NOTATION_MIME_TYPE, XML_MIME_TYPE, LLSDParseError, - LLSDSerializationError, LongType, UnicodeType, binary, starts_with, undef, uri) -from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary -from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation -from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml +from llsd.base import (_LLSD, BINARY_MIME_TYPE, NOTATION_MIME_TYPE, XML_MIME_TYPE, + BINARY_HEADER, NOTATION_HEADER, XML_HEADER, + LLSDBaseParser, LLSDParseError, LLSDSerializationError, + LongType, UnicodeType, binary, undef, uri) +from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary, parse_binary_nohdr +from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation, parse_notation_nohdr +from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml, parse_xml_nohdr def parse(something, mime_type = None): """ This is the basic public interface for parsing llsd. - :param something: The data to parse. This is expected to be bytes, not strings + :param something: The data to parse. This is expected to be bytes, not + strings, or a byte stream. :param mime_type: The mime_type of the data if it is known. :returns: Returns a python object. Python 3 Note: when reading LLSD from a file, use open()'s 'rb' mode explicitly """ - if mime_type in (XML_MIME_TYPE, 'application/llsd'): - return parse_xml(something) - elif mime_type == BINARY_MIME_TYPE: - return parse_binary(something) - elif mime_type == NOTATION_MIME_TYPE: - return parse_notation(something) - #elif content_type == 'application/json': - # return parse_notation(something) try: - something = something.lstrip() #remove any pre-trailing whitespace - if starts_with(b'', something): - return parse_binary(something) - # This should be better. - elif starts_with(b'<', something): - return parse_xml(something) + if mime_type: + # explicit mime_type -- 'something' may or may not also have a header + for mime_types, parser in ( + ({XML_MIME_TYPE, 'application/llsd'}, parse_xml), + ({BINARY_MIME_TYPE}, parse_binary), + ({NOTATION_MIME_TYPE}, parse_notation), +## ({'application/json'}, parse_notation), + ): + if mime_type.lower() in mime_types: + return parser(something) + + # no recognized mime type, look for header + baseparser = LLSDBaseParser(something) + for pattern, parser in ( + (BINARY_HEADER, parse_binary_nohdr), + (NOTATION_HEADER, parse_notation_nohdr), + (XML_HEADER, parse_xml_nohdr), + ): + if baseparser.matchseq(pattern): + # we already saw the header, don't check again + return parser(baseparser) + + # no recognized header -- does content resemble XML? + if baseparser.starts_with(b'<'): + return parse_xml_nohdr(baseparser) else: - return parse_notation(something) + return parse_notation_nohdr(baseparser) + except KeyError as e: raise LLSDParseError('LLSD could not be parsed: %s' % (e,)) except TypeError as e: diff --git a/llsd/base.py b/llsd/base.py index 544f480..7b99f53 100644 --- a/llsd/base.py +++ b/llsd/base.py @@ -2,6 +2,7 @@ import base64 import binascii import datetime +import io import os import re import sys @@ -24,6 +25,10 @@ BINARY_MIME_TYPE = 'application/llsd+binary' NOTATION_MIME_TYPE = 'application/llsd+notation' +XML_HEADER = b'' +BINARY_HEADER = b'' +NOTATION_HEADER = b'' + ALL_CHARS = str(bytearray(range(256))) if PY2 else bytes(range(256)) @@ -78,12 +83,6 @@ class LLSDSerializationError(TypeError): except NameError: UnicodeType = str -# can't just check for NameError: 'bytes' is defined in both Python 2 and 3 -if PY2: - BytesType = str -else: - BytesType = bytes - try: b'%s' % (b'yes',) except TypeError: @@ -141,19 +140,6 @@ def B(fmt): return fmt -class PY3SemanticBytes(BytesType): - """Wrapper to make `buffer[n]` return an integer like in Py3""" - __slots__ = [] - - def __getitem__(self, item): - ret = super(PY3SemanticBytes, self).__getitem__(item) - # `buffer[n]` should return an integer, but slice syntax like - # `buffer[n:n+1]` should still return a `Bytes` object as before. - if is_integer(item): - return ord(ret) - return ret - - def is_integer(o): """ portable test if an object is like an int """ return isinstance(o, IntTypes) @@ -169,11 +155,6 @@ def is_string(o): return isinstance(o, StringTypes) -def is_bytes(o): - """ portable check if an object is an immutable byte array """ - return isinstance(o, BytesType) - - #date: d"YYYY-MM-DDTHH:MM:SS.FFFFFFZ" _date_regex = re.compile(r"(?P\d{4})-(?P\d{2})-(?P\d{2})T" r"(?P\d{2}):(?P\d{2}):(?P\d{2})" @@ -375,36 +356,136 @@ class LLSDBaseParser(object): """ Utility methods useful for parser subclasses. """ - __slots__ = ['_buffer', '_index', '_decode_buff'] + __slots__ = ['_stream', '_decode_buff'] - def __init__(self): - self._buffer = b'' - self._index = 0 + def __init__(self, something=b''): + self._reset(something) # Scratch space for decoding delimited strings self._decode_buff = bytearray(_DECODE_BUFF_ALLOC_SIZE) - def _error(self, message, offset=0): + def _reset(self, something): + if isinstance(something, LLSDBaseParser): + # When passed an existing LLSDBaseParser (subclass) instance, just + # borrow its existing _stream. + self._stream = something._stream + elif isinstance(something, bytes): + # Wrap an incoming bytes string into a stream. If the passed bytes + # string is so large that the overhead of copying it into a + # BytesIO is significant, advise caller to pass a stream instead. + # BytesIO has no peek() method, so wrap it in BufferedReader. + self._stream = io.BufferedReader(io.BytesIO(something)) + elif hasattr(something, 'peek'): + # 'something' is already a buffered stream, use directly + self._stream = something + else: + # 'something' isn't buffered, wrap in BufferedReader + # (let BufferedReader handle the problem of passing an + # inappropriate object) + self._stream = io.BufferedReader(something) + + def starts_with(self, pattern): + """ + Like matchseq(), except that starts_with() doesn't consume what it + matches: it always resets our input stream to its previous position. + """ + oldpos = self._stream.tell() try: - byte = self._buffer[self._index+offset] - except IndexError: - byte = None - raise LLSDParseError("%s at byte %d: %s" % (message, self._index+offset, byte)) + return self.matchseq(pattern) + finally: + self._stream.seek(oldpos) - def _peek(self, num=1): + def matchseq(self, pattern): + """ + Match bytes object 'pattern' after skipping arbitrary leading + whitespace. After successfully matching 'pattern', skip trailing + whitespace as well. + + 'pattern' is NOT a regular expression, but a bytes string in which + each space character matches zero or more whitespace characters in the + stream. Non-space characters are matched case-insensitively. + + If 'pattern' matches, return True and leave our input stream advanced + past the last byte examined. + + If 'pattern' does not match, return False and reset our input stream + to its previous read position. + """ + oldpos = self._stream.tell() + for chunk in pattern.split(): + # skip leading space before this chunk + c = self._next_nonblank() + # if we hit EOF, no match + if not c: + self._stream.seek(oldpos) + return False + # not EOF: try to match non-empty chunk, + # not forgetting that 'c' is a lookahead byte + # (split() never produces a zero-length chunk) + maybe = c + self._stream.read(len(chunk)-1) + if maybe.lower() != chunk.lower(): + # mismatch, reset + self._stream.seek(oldpos) + return False + # so far so good, back for next chunk + + # here we've matched every chunk, with the read pointer just at the end of + # the last matched chunk -- skip trailing space + if self._next_nonblank(): + # back up one character, i.e. put back the nonblank + self._stream.seek(-1, io.SEEK_CUR) + # success! + return True + + def remainder(self): + # return a stream object representing the parse input (from last + # _reset() call), whose read position is set past scanned input + return self._stream + + def _next_nonblank(self): + # we directly call read() rather than getc() because our caller is + # prepared to handle empty string, meaning EOF + # (YES we want the walrus operator) + c = self._stream.read(1) + while c.isspace(): + c = self._stream.read(1) + return c + + def _getc(self, num=1): + got = self._stream.read(num) + if len(got) < num: + self._error("Trying to read past end of stream") + return got + + def _peek(self, num=1, full=True): + # full=True means error if we can't peek ahead num bytes if num < 0: # There aren't many ways this can happen. The likeliest is that # we've just read garbage length bytes from a binary input string. # We happen to know that lengths are encoded as 4 bytes, so back # off by 4 bytes to try to point the user at the right spot. self._error("Invalid length field %d" % num, -4) - if self._index + num > len(self._buffer): - self._error("Trying to read past end of buffer") - return self._buffer[self._index:self._index + num] - def _getc(self, num=1): - chars = self._peek(num) - self._index += num - return chars + got = self._stream.peek(num) + if full and len(got) < num: + # Going right to this error is a little iffy: + # BufferedReader.peek() does not promise to return the requested + # length, but does not clarify the conditions under which it + # returns fewer bytes. If this is an actual problem, we could loop + # until we have the requested length or EOF -- but the loop must + # explicitly seek() past already-peeked data, then reset after. + # https://docs.python.org/3/library/io.html#io.BufferedReader.peek + self._error("Trying to peek past end of stream") + + # Interestingly, peek() can also return MORE than requested -- but for + # our purposes (e.g. ord(peek(1))) it's important to constrain it. + return got[:num] + + def _error(self, message, offset=0): + oldpos = self._stream.tell() + # 'offset' is relative to current pos + self._stream.seek(offset, io.SEEK_CUR) + raise LLSDParseError("%s at byte %d: %r" % + (message, oldpos+offset, self._peek(1, full=False))) # map char following escape char to corresponding character _escaped = { @@ -424,21 +505,18 @@ def _parse_string_delim(self, delim): # Preallocate a working buffer for the decoded string output # to avoid allocs in the hot loop. decode_buff = self._decode_buff - # Cache these in locals, otherwise we have to perform a lookup on + # Cache this in locals, otherwise we have to perform a lookup on # `self` in the hot loop. - buff = self._buffer - read_idx = self._index + getc = self._getc cc = 0 while True: try: - cc = buff[read_idx] - read_idx += 1 + cc = ord(getc()) if cc == _BACKSLASH_ORD: # Backslash, figure out if this is an \xNN hex escape or # something like \t - cc = buff[read_idx] - read_idx += 1 + cc = ord(getc()) if cc == _X_ORD: # It's a hex escape. char is the value of the two # following hex nybbles. This slice may result in @@ -446,8 +524,7 @@ def _parse_string_delim(self, delim): # `ValueError` will be triggered by the first case, # and the second will cause an `IndexError` on the # next iteration of the loop. - hex_bytes = buff[read_idx:read_idx + 2] - read_idx += 2 + hex_bytes = getc(2) try: # int() can parse a `bytes` containing hex, # no explicit `bytes.decode("ascii")` required. @@ -456,7 +533,6 @@ def _parse_string_delim(self, delim): # One of the hex characters was likely invalid. # Wrap the ValueError so that we can provide a # byte offset in the error. - self._index = read_idx self._error(e, offset=-2) else: # escape char preceding anything other than the chars @@ -468,7 +544,6 @@ def _parse_string_delim(self, delim): except IndexError: # We can be reasonably sure that any IndexErrors inside here # were caused by an out-of-bounds `buff[read_idx]`. - self._index = read_idx self._error("Trying to read past end of buffer") try: @@ -483,19 +558,8 @@ def _parse_string_delim(self, delim): insert_idx += 1 # Sync our local read index with the canonical one - self._index = read_idx try: # Slice off only what we used of the working decode buffer return decode_buff[:insert_idx].decode('utf-8') except UnicodeDecodeError as exc: self._error(exc) - - -def starts_with(startstr, something): - if hasattr(something, 'startswith'): - return something.startswith(startstr) - else: - pos = something.tell() - s = something.read(len(startstr)) - something.seek(pos, os.SEEK_SET) - return (s == startstr) diff --git a/llsd/fastest_elementtree.py b/llsd/fastest_elementtree.py index 8b7d6bb..0f89f8e 100644 --- a/llsd/fastest_elementtree.py +++ b/llsd/fastest_elementtree.py @@ -17,32 +17,49 @@ errors. """ +# TODO: drop version sensitivity, replacing entire module with: +#from xml.etree.ElementTree import * +#ElementTreeError = ParseError + ## # Using cElementTree might cause some unforeseen problems, so here's a # convenient off switch during development and testing. _use_celementree = True +# xml.etree.cElementTree has been deprecated since Python 3.3. +# For speed in the common case of Python 3.3+, don't even start with that. +import sys +if sys.version_info[:2] >= (3, 3): + _use_celementree = False + try: + # nat wishes for a nicer way to skip even attempting cElementTree than by + # explicitly raising ImportError. The problem is that we want to be able + # to 'import *' into the global namespace, which forbids packaging any of + # this logic in a function. Nor can we 'return' early from a module. It + # seems the only way to avoid the explicit exception would be to restate + # the entirety of each 'except ImportError' clause, which would be worse. if not _use_celementree: raise ImportError() - # Python 2.3 and 2.4. - from cElementTree import * + # Python 2.5 and above. + from xml.etree.cElementTree import * ElementTreeError = SyntaxError except ImportError: try: - if not _use_celementree: - raise ImportError() - # Python 2.5 and above. - from xml.etree.cElementTree import * - ElementTreeError = SyntaxError + # Python 2.5 and above: the common case. + from xml.etree.ElementTree import * + try: + # Python 3 + ElementTreeError = ParseError + except NameError: + # The older Python ElementTree module uses Expat for parsing. + from xml.parsers.expat import ExpatError as ElementTreeError except ImportError: - # Pure Python code. + # Python 2.3 and 2.4. try: - # Python 2.3 and 2.4. - from elementtree.ElementTree import * + if not _use_celementree: + raise ImportError() + from cElementTree import * + ElementTreeError = SyntaxError except ImportError: - # Python 2.5 and above. - from xml.etree.ElementTree import * - - # The pure Python ElementTree module uses Expat for parsing. - from xml.parsers.expat import ExpatError as ElementTreeError + from elementtree.ElementTree import * diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py index cbf65e4..42a2c0d 100644 --- a/llsd/serde_binary.py +++ b/llsd/serde_binary.py @@ -3,8 +3,8 @@ import struct import uuid -from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, _str_to_bytes, binary, is_integer, is_string, - starts_with, uri, PY2, is_bytes, PY3SemanticBytes) +from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, BINARY_HEADER, + _str_to_bytes, binary, is_integer, is_string, uri) class LLSDBinaryParser(LLSDBaseParser): @@ -63,12 +63,7 @@ def parse(self, buffer, ignore_binary = False): :param ignore_binary: parser throws away data in llsd binary nodes. :returns: returns a python object. """ - if PY2 and is_bytes(buffer): - # We need to wrap this in a helper class so that individual element - # access works the same as in PY3 - buffer = PY3SemanticBytes(buffer) - self._buffer = buffer - self._index = 0 + self._reset(buffer) self._keep_binary = not ignore_binary try: return self._parse() @@ -119,7 +114,7 @@ def _parse_array(self): cc = self._peek() if cc != b']': self._error("invalid array close token") - self._index += 1 + self._getc() return rv def _parse_string(self): @@ -142,7 +137,7 @@ def _parse_date(self): seconds = struct.unpack("\n' + _format_binary_recurse(something) + return BINARY_HEADER + b'\n' + _format_binary_recurse(something) def _format_binary_recurse(something): @@ -232,9 +227,19 @@ def parse_binary(something): :param something: The data to parse in an indexable sequence. :returns: Returns a python object. """ - if starts_with(b'', something): - just_binary = something.split(b'\n', 1)[1] - else: - just_binary = something - return LLSDBinaryParser().parse(just_binary) + # Try to match header, and if matched, skip past it. + parser = LLSDBaseParser(something) + parser.matchseq(BINARY_HEADER) + # If we matched the header, then parse whatever follows, else parse the + # original bytes object or stream. + return parse_binary_nohdr(parser) + + +def parse_binary_nohdr(baseparser): + """ + Parse llsd+binary known to be without a header. + :param baseparser: LLSDBaseParser instance wrapping the data to parse. + :returns: Returns a python object. + """ + return LLSDBinaryParser().parse(baseparser) diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py index e2e9340..6802b7e 100644 --- a/llsd/serde_notation.py +++ b/llsd/serde_notation.py @@ -3,8 +3,9 @@ import re import uuid -from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, LLSDParseError, LLSDSerializationError, UnicodeType, - _format_datestr, _parse_datestr, _str_to_bytes, binary, uri, PY2, is_bytes, PY3SemanticBytes) +from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, NOTATION_HEADER, + LLSDParseError, LLSDSerializationError, UnicodeType, + _format_datestr, _parse_datestr, _str_to_bytes, binary, uri) _int_regex = re.compile(br"[-+]?\d+") _real_regex = re.compile(br"[-+]?(?:(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?)|[-+]?inf|[-+]?nan") @@ -83,26 +84,28 @@ def parse(self, buffer, ignore_binary = False): :param ignore_binary: parser throws away data in llsd binary nodes. :returns: returns a python object. """ - if buffer == b"": - return False + self._reset(buffer) - if PY2 and is_bytes(buffer): - # We need to wrap this in a helper class so that individual element - # access works the same as in PY3 - buffer = PY3SemanticBytes(buffer) + # special case for notation: empty binary string means False + if not self._stream.peek(1): + return False - self._buffer = buffer - self._index = 0 return self._parse() def _get_until(self, delim): - start = self._index - end = self._buffer.find(delim, start) - if end == -1: + content = [] + try: + c = self._getc() + while c != delim: + content.append(c) + c = self._getc() + except LLSDParseError: + # traditionally this function returns None when there's no + # subsequent delim within the input buffer return None else: - self._index = end + 1 - return self._buffer[start:end] + # we've already consumed the close delim + return b''.join(content) def _skip_then(self, value): # We've already _peek()ed at the current character, which is how we @@ -111,11 +114,21 @@ def _skip_then(self, value): return value def _get_re(self, desc, regex, override=None): - match = re.match(regex, self._buffer[self._index:]) + # This is the case for which we introduced _peek(full=False). + # Instead of trying to reimplement each of the re patterns passed to + # this method as individual operations on _util, peek ahead by a + # reasonable amount and directly use re. full=False means we're + # willing to accept a result buffer shorter than our lookahead. + # You would think we could parse int, real, True or False with fewer + # bytes than this, but fuzz testing produces some real humdinger int + # values. + peek = self._peek(80, full=False) + match = regex.match(peek) if not match: self._error("Invalid %s token" % desc) else: - self._index += match.end() + # skip what we matched + self._getc(match.end()) return override if override is not None else match.group(0) def _parse(self): @@ -417,4 +430,19 @@ def parse_notation(something): :param something: The data to parse. :returns: Returns a python object. """ - return LLSDNotationParser().parse(something) \ No newline at end of file + # Try to match header, and if matched, skip past it. + parser = LLSDBaseParser(something) + parser.matchseq(NOTATION_HEADER) + # If we matched the header, then parse whatever follows, else parse the + # original bytes object or stream. + return parse_notation_nohdr(parser) + + +def parse_notation_nohdr(baseparser): + """ + Parse llsd+notation known to be without a header. + + :param baseparser: LLSDBaseParser instance wrapping the data to parse. + :returns: Returns a python object. + """ + return LLSDNotationParser().parse(baseparser) diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py index c8404a5..3e4fd8b 100644 --- a/llsd/serde_xml.py +++ b/llsd/serde_xml.py @@ -2,9 +2,10 @@ import re import types -from llsd.base import (_LLSD, ALL_CHARS, B, LLSDBaseFormatter, LLSDParseError, LLSDSerializationError, UnicodeType, +from llsd.base import (_LLSD, ALL_CHARS, B, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER, + LLSDParseError, LLSDSerializationError, UnicodeType, _format_datestr, _str_to_bytes, _to_python, is_unicode) -from llsd.fastest_elementtree import ElementTreeError, fromstring +from llsd.fastest_elementtree import ElementTreeError, fromstring, parse as _parse INVALID_XML_BYTES = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c'\ b'\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18'\ @@ -226,12 +227,6 @@ def format_pretty_xml(something): return LLSDXMLPrettyFormatter().format(something) -declaration_regex = re.compile(br'^\s*(?:<\?[\x09\x0A\x0D\x20-\x7e]+\?>)|(?:)') -def validate_xml_declaration(something): - if not declaration_regex.match(something): - raise LLSDParseError("Invalid XML Declaration") - - def parse_xml(something): """ This is the basic public interface for parsing llsd+xml. @@ -239,13 +234,37 @@ def parse_xml(something): :param something: The data to parse. :returns: Returns a python object. """ + # Try to match header, and if matched, skip past it. + parser = LLSDBaseParser(something) + parser.matchseq(XML_HEADER) + # If we matched the header, then parse whatever follows, else parse the + # original bytes object or stream. + return parse_xml_nohdr(parser) + + +def parse_xml_nohdr(baseparser): + """ + Parse llsd+xml known to be without an header. May still + have a normal XML declaration, e.g. . + + :param baseparser: LLSDBaseParser instance wrapping the data to parse. + :returns: Returns a python object. + """ + # Python 3.9's xml.etree.ElementTree.fromstring() does not like whitespace + # before XML declaration. Since we explicitly test support for that case, + # skip initial whitespace. + baseparser.matchseq(b'') try: - # validate xml declaration manually until http://bugs.python.org/issue7138 is fixed - validate_xml_declaration(something) - return _to_python(fromstring(something)[0]) + element = _parse(baseparser.remainder()).getroot() except ElementTreeError as err: raise LLSDParseError(*err.args) + # We expect that the outer-level XML element is .... + if element.tag != 'llsd': + raise LLSDParseError("Invalid XML Declaration") + # Extract its contents. + return _to_python(element[0]) + _g_xml_formatter = None def format_xml(something): @@ -263,4 +282,4 @@ def format_xml(something): global _g_xml_formatter if _g_xml_formatter is None: _g_xml_formatter = LLSDXMLFormatter() - return _g_xml_formatter.format(something) \ No newline at end of file + return _g_xml_formatter.format(something) diff --git a/tests/llsd_test.py b/tests/llsd_test.py index b86ab96..6691c1c 100644 --- a/tests/llsd_test.py +++ b/tests/llsd_test.py @@ -3,14 +3,15 @@ from __future__ import print_function import base64 +from datetime import date, datetime +import io +from itertools import islice import pprint import re import struct import time import unittest import uuid -from datetime import date, datetime -from itertools import islice import pytest @@ -60,7 +61,7 @@ def strip(self, the_string): :Parameters: - 'the_string': string to remove the whitespaces. """ - return re.sub(b'\s', b'', the_string) + return re.sub(br'\s', b'', the_string) def assertNotationRoundtrip(self, py_in, str_in, is_alternate_notation=False): """ @@ -69,8 +70,12 @@ def assertNotationRoundtrip(self, py_in, str_in, is_alternate_notation=False): """ # use parse to check here py_out = self.llsd.parse(str_in) + py_out2 = self.llsd.parse(io.BytesIO(str_in)) + self.assertEqual(py_out2, py_out) str_out = self.llsd.as_notation(py_in) py_roundtrip = self.llsd.parse(str_out) + py_roundtrip2 = self.llsd.parse(io.BytesIO(str_out)) + self.assertEqual(py_roundtrip2, py_roundtrip) str_roundtrip = self.llsd.as_notation(py_out) # compare user-passed Python data with parsed user-passed string self.assertEqual(py_in, py_out) @@ -325,7 +330,7 @@ def testBinary(self): notation3 = b'b16' + b'"' + base64.b16encode(string_data1).strip() + b'"' notation4 = b'b16' + b'"' + base64.b16encode(string_data2).strip() + b'"' notation5 = b'b85' + b'"<~EHPu*CER),Dg-(AAoDo;+T~>"' - notation6 = b'b85' + b'"<~4E*J.<+0QR+EMIu4+@0gX@q@26G%G]>+D"u%DImm2Cj@Wq05s)~>"' + notation6 = b'b85' +br'"<~4E*J.<+0QR+EMIu4+@0gX@q@26G%G]>+D"u%DImm2Cj@Wq05s)~>"' self.assertNotationRoundtrip(python_binary1, notation1, True) self.assertNotationRoundtrip(python_binary2, notation2, True) @@ -546,7 +551,10 @@ def roundTrip(self, something): return the object. """ binary = self.llsd.as_binary(something) - return self.llsd.parse(binary) + frombytes = self.llsd.parse(binary) + fromstream = self.llsd.parse(io.BytesIO(binary)) + self.assertEqual(fromstream, frombytes) + return frombytes def testMap(self): """ @@ -981,6 +989,8 @@ def assertXMLRoundtrip(self, py, xml, ignore_rounding=False): # use parse to check parsed_py = self.llsd.parse(xml) + parsed_stream = self.llsd.parse(io.BytesIO(xml)) + self.assertEqual(parsed_stream, parsed_py) formatted_xml = self.llsd.as_xml(py) self.assertEqual(parsed_py, py) self.assertEqual(py, self.llsd.parse(formatted_xml)) @@ -1557,7 +1567,7 @@ def strip(self, the_string): Utility method to remove all the whitespace characters from the given string. """ - return re.sub(b'\s', b'', the_string) + return re.sub(br'\s', b'', the_string) def test_segfault(self): for i, badstring in enumerate([ @@ -1873,4 +1883,4 @@ def test_uuid_map_key(self): ''' llsdmap=llsd.LLSD({uuid.UUID(int=0) : 'uuid'}) self.assertEqual(llsd.format_xml(llsdmap), b'00000000-0000-0000-0000-000000000000uuid') - self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}") \ No newline at end of file + self.assertEqual(llsd.format_notation(llsdmap), b"{'00000000-0000-0000-0000-000000000000':'uuid'}")