Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strip illegal xml chars from webservice response #59

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 29 additions & 1 deletion prestapyt/prestapyt.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
from . import xml2dict
from . import dict2xml

import re
import sys

from xml.parsers.expat import ExpatError
from distutils.version import LooseVersion
try:
Expand Down Expand Up @@ -139,6 +142,9 @@ def __init__(self, api_url, api_key, debug=False, session=None,
if not self.client.auth:
self.client.auth = (api_key, '')

# To be used for stripping before parsing xml
self.illegal_xml_chars = self.get_illegal_xml_chars()

def _parse_error(self, xml_content):
"""Take the XML content as string and extract the PrestaShop error.

Expand Down Expand Up @@ -265,7 +271,8 @@ def _parse(self, content):
raise PrestaShopWebServiceError('HTTP response is empty')

try:
parsed_content = ElementTree.fromstring(content)
stripped_content = self.illegal_xml_chars.sub(b'', content)
parsed_content = ElementTree.fromstring(stripped_content)
except ExpatError as err:
raise PrestaShopWebServiceError(
'HTTP XML response is not parsable : %s' % (err,)
Expand Down Expand Up @@ -520,6 +527,27 @@ def get_content_type(self, filename):
"""
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'

def get_illegal_xml_chars(self):
""" Returns compiled regex with illegal xml chars

:return: compiled regex
"""
illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F),
(0x7F, 0x84), (0x86, 0x9F),
(0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)]
if sys.maxunicode >= 0x10000: # not narrow build
illegal_unichrs.extend([(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])

illegal_ranges = [fr'{chr(low)}-{chr(high)}' for (low, high) in illegal_unichrs]
xml_illegal_character_regex = '[' + ''.join(illegal_ranges) + ']'
return re.compile(bytes(xml_illegal_character_regex, encoding='utf-8'))

class PrestaShopWebServiceDict(PrestaShopWebService):
"""Interacts with the PrestaShop WebService API, use dict for messages."""
Expand Down