Skip to content

Commit

Permalink
Strip illegal xml chars from webservice response
Browse files Browse the repository at this point in the history
  • Loading branch information
Auridas committed Dec 17, 2020
1 parent 381f99d commit a1ed787
Showing 1 changed file with 29 additions and 1 deletion.
30 changes: 29 additions & 1 deletion prestapyt/prestapyt.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
from . import xml2dict
from . import dict2xml

import re
import sys

from xml.parsers.expat import ExpatError
from distutils.version import LooseVersion
try:
Expand Down Expand Up @@ -139,6 +142,9 @@ def __init__(self, api_url, api_key, debug=False, session=None,
if not self.client.auth:
self.client.auth = (api_key, '')

# To be used for stripping before parsing xml
self.illegal_xml_chars = self.get_illegal_xml_chars()

def _parse_error(self, xml_content):
"""Take the XML content as string and extract the PrestaShop error.
Expand Down Expand Up @@ -265,7 +271,8 @@ def _parse(self, content):
raise PrestaShopWebServiceError('HTTP response is empty')

try:
parsed_content = ElementTree.fromstring(content)
stripped_content = self.illegal_xml_chars.sub(b'', content)
parsed_content = ElementTree.fromstring(stripped_content)
except ExpatError as err:
raise PrestaShopWebServiceError(
'HTTP XML response is not parsable : %s' % (err,)
Expand Down Expand Up @@ -520,6 +527,27 @@ def get_content_type(self, filename):
"""
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'

def get_illegal_xml_chars(self):
""" Returns compiled regex with illegal xml chars
:return: compiled regex
"""
illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F),
(0x7F, 0x84), (0x86, 0x9F),
(0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)]
if sys.maxunicode >= 0x10000: # not narrow build
illegal_unichrs.extend([(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])

illegal_ranges = [fr'{chr(low)}-{chr(high)}' for (low, high) in illegal_unichrs]
xml_illegal_character_regex = '[' + ''.join(illegal_ranges) + ']'
return re.compile(bytes(xml_illegal_character_regex, encoding='utf-8'))

class PrestaShopWebServiceDict(PrestaShopWebService):
"""Interacts with the PrestaShop WebService API, use dict for messages."""
Expand Down

0 comments on commit a1ed787

Please sign in to comment.