diff --git a/prestapyt/prestapyt.py b/prestapyt/prestapyt.py index a51d27f..d9eb42c 100644 --- a/prestapyt/prestapyt.py +++ b/prestapyt/prestapyt.py @@ -32,6 +32,9 @@ from . import xml2dict from . import dict2xml +import re +import sys + from xml.parsers.expat import ExpatError from distutils.version import LooseVersion try: @@ -139,6 +142,9 @@ def __init__(self, api_url, api_key, debug=False, session=None, if not self.client.auth: self.client.auth = (api_key, '') + # To be used for stripping before parsing xml + self.illegal_xml_chars = self.get_illegal_xml_chars() + def _parse_error(self, xml_content): """Take the XML content as string and extract the PrestaShop error. @@ -265,7 +271,8 @@ def _parse(self, content): raise PrestaShopWebServiceError('HTTP response is empty') try: - parsed_content = ElementTree.fromstring(content) + stripped_content = self.illegal_xml_chars.sub(b'', content) + parsed_content = ElementTree.fromstring(stripped_content) except ExpatError as err: raise PrestaShopWebServiceError( 'HTTP XML response is not parsable : %s' % (err,) @@ -520,6 +527,27 @@ def get_content_type(self, filename): """ return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + def get_illegal_xml_chars(self): + """ Returns compiled regex with illegal xml chars + + :return: compiled regex + """ + illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), + (0x7F, 0x84), (0x86, 0x9F), + (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)] + if sys.maxunicode >= 0x10000: # not narrow build + illegal_unichrs.extend([(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), + (0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF), + (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF), + (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), + (0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF), + (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF), + (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), + (0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)]) + + illegal_ranges = [fr'{chr(low)}-{chr(high)}' for (low, high) in illegal_unichrs] + xml_illegal_character_regex = '[' + ''.join(illegal_ranges) + ']' + return re.compile(bytes(xml_illegal_character_regex, encoding='utf-8')) class PrestaShopWebServiceDict(PrestaShopWebService): """Interacts with the PrestaShop WebService API, use dict for messages."""