Skip to content

Commit

Permalink
Fix etree XMLPullParser tests for Expat >=2.6.0 with reparse deferral
Browse files Browse the repository at this point in the history
Combined with gh#python/cpython!31453
bpo-46811: Make test suite support Expat >=2.4.5 (pythonGH-31453)

Curly brackets were never allowed in namespace URIs
according to RFC 3986, and so-called namespace-validating
XML parsers have the right to reject them a invalid URIs.

libexpat >=2.4.5 has become strcter in that regard due to
related security issues; with ET.XML instantiating a
namespace-aware parser under the hood, this test has no
future in CPython.

References:
- https://datatracker.ietf.org/doc/html/rfc3968
- https://www.w3.org/TR/xml-names/

Also, test_minidom.py: Support Expat >=2.4.5
(cherry picked from commit 2cae938)

Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
Fixes: gh#python#115133
From-PR: gh#python/cpython!115138
Patch: CVE-2023-52425-libexpat-2.6.0-backport.patch
  • Loading branch information
hartwork authored and mcepl committed May 10, 2024
1 parent 23fc28b commit d7133c7
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 9 deletions.
12 changes: 12 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import re
import shutil
import socket
import pyexpat
import stat
import struct
import subprocess
Expand Down Expand Up @@ -112,6 +113,7 @@
"run_with_locale", "swap_item",
"swap_attr", "Matcher", "set_memlimit", "SuppressCrashReport", "sortdict",
"run_with_tz", "PGO", "missing_compiler_executable", "fd_count",
"fails_with_expat_2_6_0", "is_expat_2_6_0"
]

class Error(Exception):
Expand Down Expand Up @@ -2882,3 +2884,13 @@ def adjust_int_max_str_digits(max_digits):
yield
finally:
sys.set_int_max_str_digits(current)


@functools.lru_cache(maxsize=32)
def _is_expat_2_6_0():
return hasattr(pyexpat.ParserCreate(), 'SetReparseDeferralEnabled')
is_expat_2_6_0 = _is_expat_2_6_0()

fails_with_expat_2_6_0 = (unittest.expectedFailure
if is_expat_2_6_0
else lambda test: test)
12 changes: 10 additions & 2 deletions Lib/test/test_minidom.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from xml.dom.minidom import parse, Node, Document, parseString
from xml.dom.minidom import getDOMImplementation
from xml.parsers.expat import ExpatError


tstfile = support.findfile("test.xml", subdir="xmltestdata")
Expand Down Expand Up @@ -1156,7 +1157,11 @@ def testEncodings(self):

# Verify that character decoding errors raise exceptions instead
# of crashing
self.assertRaises(UnicodeDecodeError, parseString,
# It doesn’t make any sense to insist on the exact text of the
# error message, or even the exact Exception … it is enough that
# the error has been discovered.
with self.assertRaises((UnicodeDecodeError, ExpatError)):
parseString(
b'<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')

doc.unlink()
Expand Down Expand Up @@ -1602,7 +1607,10 @@ def testEmptyXMLNSValue(self):
self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE)

def testExceptionOnSpacesInXMLNSValue(self):
with self.assertRaisesRegex(ValueError, 'Unsupported syntax'):
# It doesn’t make any sense to insist on the exact text of the
# error message, or even the exact Exception … it is enough that
# the error has been discovered.
with self.assertRaises((ExpatError, ValueError)):
parseString('<element xmlns:abc="http:abc.com/de f g/hi/j k"><abc:foo /></element>')

def testDocRemoveChild(self):
Expand Down
61 changes: 60 additions & 1 deletion Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from xml.parsers import expat
from xml.parsers.expat import errors

from test.support import sortdict
from test.support import sortdict, is_expat_2_6_0


class SetAttributeTest(unittest.TestCase):
Expand Down Expand Up @@ -729,5 +729,64 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])


class ReparseDeferralTest(unittest.TestCase):
def test_getter_setter_round_trip(self):
if not is_expat_2_6_0:
self.skipTest("Linked libexpat doesn't support reparse deferral")

parser = expat.ParserCreate()
enabled = (expat.version_info >= (2, 6, 0))

self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
parser.SetReparseDeferralEnabled(False)
self.assertIs(parser.GetReparseDeferralEnabled(), False)
parser.SetReparseDeferralEnabled(True)
self.assertIs(parser.GetReparseDeferralEnabled(), enabled)

def test_reparse_deferral_enabled(self):
if not is_expat_2_6_0:
self.skipTest("Linked libexpat doesn't support reparse deferral")

started = []

def start_element(name, _):
started.append(name)

parser = expat.ParserCreate()
parser.StartElementHandler = start_element
self.assertTrue(parser.GetReparseDeferralEnabled())

for chunk in (b'<doc', b'/>'):
parser.Parse(chunk, False)

# The key test: Have handlers already fired? Expecting: no.
self.assertEqual(started, [])

parser.Parse(b'', True)

self.assertEqual(started, ['doc'])

def test_reparse_deferral_disabled(self):
if not is_expat_2_6_0:
self.skipTest("Linked libexpat doesn't support reparse deferral")

started = []

def start_element(name, _):
started.append(name)

parser = expat.ParserCreate()
parser.StartElementHandler = start_element
if is_expat_2_6_0:
parser.SetReparseDeferralEnabled(False)
self.assertFalse(parser.GetReparseDeferralEnabled())

for chunk in (b'<doc', b'/>'):
parser.Parse(chunk, False)

# The key test: Have handlers already fired? Expecting: yes.
self.assertEqual(started, ['doc'])


if __name__ == "__main__":
unittest.main()
54 changes: 53 additions & 1 deletion Lib/test/test_sax.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import shutil
from urllib.error import URLError
from test import support
from test.support import findfile, run_unittest, TESTFN
from test.support import findfile, run_unittest, TESTFN, is_expat_2_6_0

TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
Expand Down Expand Up @@ -1168,6 +1168,58 @@ def test_expat_incremental_reset(self):

self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")

def test_flush_reparse_deferral_enabled(self):
if not is_expat_2_6_0:
self.skipTest("Linked libexpat doesn't support reparse deferral")

result = BytesIO()
xmlgen = XMLGenerator(result)
parser = create_parser()
parser.setContentHandler(xmlgen)

for chunk in ("<doc", ">"):
parser.feed(chunk)

self.assertEqual(result.getvalue(), start) # i.e. no elements started
self.assertTrue(parser._parser.GetReparseDeferralEnabled())

parser.flush()

self.assertTrue(parser._parser.GetReparseDeferralEnabled())
self.assertEqual(result.getvalue(), start + b"<doc>")

parser.feed("</doc>")
parser.close()

self.assertEqual(result.getvalue(), start + b"<doc></doc>")

def test_flush_reparse_deferral_disabled(self):
if not is_expat_2_6_0:
self.skipTest("Linked libexpat doesn't support reparse deferral")

result = BytesIO()
xmlgen = XMLGenerator(result)
parser = create_parser()
parser.setContentHandler(xmlgen)

for chunk in ("<doc", ">"):
parser.feed(chunk)

parser._parser.SetReparseDeferralEnabled(False)
self.assertEqual(result.getvalue(), start) # i.e. no elements started

self.assertFalse(parser._parser.GetReparseDeferralEnabled())

parser.flush()

self.assertFalse(parser._parser.GetReparseDeferralEnabled())
self.assertEqual(result.getvalue(), start + b"<doc>")

parser.feed("</doc>")
parser.close()

self.assertEqual(result.getvalue(), start + b"<doc></doc>")

# ===== Locator support

def test_expat_locator_noinfo(self):
Expand Down
14 changes: 9 additions & 5 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

from itertools import product
from test import support
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
from test.support import (TESTFN, findfile, import_fresh_module,
gc_collect, swap_attr, is_expat_2_6_0, fails_with_expat_2_6_0)

# pyET is the pure-Python implementation.
#
Expand Down Expand Up @@ -1047,6 +1048,7 @@ def assert_event_tags(self, parser, expected):
def test_simple_xml(self):
for chunk_size in (None, 1, 5):
with self.subTest(chunk_size=chunk_size):
expected_events = []
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
self._feed(parser, "<!-- comment -->\n", chunk_size)
Expand All @@ -1056,16 +1058,17 @@ def test_simple_xml(self):
chunk_size)
self.assert_event_tags(parser, [])
self._feed(parser, ">\n", chunk_size)
self.assert_event_tags(parser, [('end', 'element')])
expected_events += [('end', 'element')]
self._feed(parser, "<element>text</element>tail\n", chunk_size)
self._feed(parser, "<empty-element/>\n", chunk_size)
self.assert_event_tags(parser, [
expected_events += [
('end', 'element'),
('end', 'empty-element'),
])
]
self._feed(parser, "</root>\n", chunk_size)
self.assert_event_tags(parser, [('end', 'root')])
expected_events += [('end', 'root')]
self.assertIsNone(parser.close())
self.assert_event_tags(parser, expected_events)

def test_feed_while_iterating(self):
parser = ET.XMLPullParser()
Expand Down Expand Up @@ -1668,6 +1671,7 @@ def test_issue6233(self):
b"<?xml version='1.0' encoding='ascii'?>\n"
b'<body>t&#227;g</body>')

@unittest.skip('Fails with modern libexpat.')
def test_issue3151(self):
e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
self.assertEqual(e.tag, '{${stuff}}localname')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make test suite support Expat >=2.4.5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix etree XMLPullParser tests for Expat >=2.6.0 with reparse deferral

0 comments on commit d7133c7

Please sign in to comment.