From bba2678dbac5cab126810580277f61216c73dbd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 18:37:44 +0200 Subject: [PATCH 1/6] expose Expat mitigation API to prevent exponential expansions --- Doc/library/pyexpat.rst | 55 ++++++- Doc/whatsnew/3.15.rst | 12 +- Include/pyexpat.h | 5 + Lib/test/test_pyexpat.py | 58 +++++++ ...5-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 4 +- ...5-09-26-18-04-28.gh-issue-90949.YHjSzX.rst | 7 + Modules/clinic/pyexpat.c.h | 136 ++++++++++++++- Modules/pyexpat.c | 155 ++++++++++++++---- 8 files changed, 397 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 4c21bc875217b9..0606b1ab35138e 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -241,6 +241,55 @@ XMLParser Objects :class:`!xmlparser` objects have the following methods to mitigate some common XML vulnerabilities. +.. method:: xmlparser.SetBillionLaughsAttackProtectionActivationThreshold(threshold, /) + + Sets the number of output bytes needed to activate protection against + `billion laughs`_ attacks. + + The number of output bytes includes amplification from entity expansion + and reading DTD files. + + By default, parser objects have a protection activation threshold of 8 MiB, + or equivalently 8,388,608 bytes. + + An :exc:`ExpatError` is raised if this method is called on a + |xml-non-root-parser| parser. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. + + .. versionadded:: next + +.. method:: xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification(max_factor, /) + + Sets the maximum tolerated amplification factor for protection against + `billion laughs`_ attacks. + + The amplification factor is calculated as ``(direct + indirect) / direct`` + while parsing, where ``direct`` is the number of bytes read from + the primary document in parsing and ``indirect`` is the number of + bytes added by expanding entities and reading of external DTD files. + + The *max_factor* value must be a non-NaN :class:`float` value greater than + or equal to 1.0. Peak amplifications of factor 15,000 for the entire payload + and of factor 30,000 in the middle of parsing have been observed with small + benign files in practice. In particular, the activation threshold should be + carefully chosen to avoid false positives. + + By default, parser objects have a maximum amplification factor of 100.0. + + An :exc:`ExpatError` is raised if this method is called on a + |xml-non-root-parser| parser or if *max_factor* is outside the valid range. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. + + .. note:: + + The maximum amplification factor is only considered if the threshold + that can be adjusted by :meth:`.SetBillionLaughsAttackProtectionActivationThreshold` + is exceeded. + + .. versionadded:: next + .. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /) Sets the number of allocated bytes of dynamic memory needed to activate @@ -281,8 +330,8 @@ common XML vulnerabilities. .. note:: The maximum amplification factor is only considered if the threshold - that can be adjusted :meth:`.SetAllocTrackerActivationThreshold` is - exceeded. + that can be adjusted by :meth:`.SetAllocTrackerActivationThreshold` + is exceeded. .. versionadded:: next @@ -1010,4 +1059,6 @@ The ``errors`` module has the following attributes: not. See https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EncodingDecl and https://www.iana.org/assignments/character-sets/character-sets.xhtml. + +.. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack .. |xml-non-root-parser| replace:: :ref:`non-root ` diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 3a395c197021d1..7e85b141b1642f 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -556,8 +556,16 @@ unittest xml.parsers.expat ----------------- -* Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` - and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` +* Add :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold` + and :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification` + to :ref:`xmlparser ` objects to mitigate `billion laughs`_ + attacks. + (Contributed by Bénédikt Tran in :gh:`90949`.) + + .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack + +* Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` + and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 04548b7684a2fd..f523f8bb273983 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -57,6 +57,11 @@ struct PyExpat_CAPI XML_Parser parser, unsigned long long activationThresholdBytes); XML_Bool (*SetAllocTrackerMaximumAmplification)( XML_Parser parser, float maxAmplificationFactor); + /* might be NULL for expat < 2.4.0 */ + XML_Bool (*SetBillionLaughsAttackProtectionActivationThreshold)( + XML_Parser parser, unsigned long long activationThresholdBytes); + XML_Bool (*SetBillionLaughsAttackProtectionMaximumAmplification)( + XML_Parser parser, float maxAmplificationFactor); /* always add new stuff to the end! */ }; diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 9cf9ac2f613b6e..8e0f7374b26fd0 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -958,6 +958,64 @@ def test_set_maximum_amplification__fail_for_subparser(self): self.assert_root_parser_failure(setter, 123.45) +@unittest.skipIf(expat.version_info < (2, 4, 0), "requires Expat >= 2.4.0") +class ExpansionProtectionTest(AttackProtectionTestBase, unittest.TestCase): + + def assert_rejected(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) hits the allocation limit.""" + msg = ( + r"limit on input amplification factor \(from DTD and entities\) " + r"breached: line \d+, column \d+" + ) + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + def set_activation_threshold(self, parser, threshold): + return parser.SetBillionLaughsAttackProtectionActivationThreshold(threshold) + + def set_maximum_amplification(self, parser, max_factor): + return parser.SetBillionLaughsAttackProtectionMaximumAmplification(max_factor) + + def test_set_activation_threshold__threshold_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be always reached. + self.set_activation_threshold(parser, 3) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_activation_threshold__threshold_not_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be never reached. + self.set_activation_threshold(parser, pow(10, 5)) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assertIsNotNone(parser.Parse(payload, True)) + + def test_set_maximum_amplification__amplification_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_maximum_amplification__amplification_not_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assertIsNotNone(parser.Parse(payload, True)) + + @unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index 0719d4353fb708..5611f33fb8e37b 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ -Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` -and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` +Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` to :ref:`xmlparser ` objects to prevent use of disproportional amounts of dynamic memory from within an Expat parser. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst b/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst new file mode 100644 index 00000000000000..22d831433e765d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst @@ -0,0 +1,7 @@ +Add +:meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold` +and +:meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification` +to :ref:`xmlparser ` objects to mitigate `billion laughs +`_ attacks. Patch by +Bénédikt Tran. diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index e178547060446e..0c60ca9ea0e675 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -409,6 +409,132 @@ pyexpat_xmlparser_UseForeignDTD(PyObject *self, PyTypeObject *cls, PyObject *con #endif /* (XML_COMBINED_VERSION >= 19505) */ +#if (XML_COMBINED_VERSION >= 20402) + +PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold__doc__, +"SetBillionLaughsAttackProtectionActivationThreshold($self, threshold, /)\n" +"--\n" +"\n" +"Sets the number of output bytes needed to activate protection against billion laughs attacks.\n" +"\n" +"By default, parser objects have a protection activation threshold of 8 MiB."); + +#define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONACTIVATIONTHRESHOLD_METHODDEF \ + {"SetBillionLaughsAttackProtectionActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold__doc__}, + +static PyObject * +pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold); + +static PyObject * +pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "SetBillionLaughsAttackProtectionActivationThreshold", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + unsigned long long threshold; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLongLong_Converter(args[0], &threshold)) { + goto exit; + } + return_value = pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold_impl((xmlparseobject *)self, cls, threshold); + +exit: + return return_value; +} + +#endif /* (XML_COMBINED_VERSION >= 20402) */ + +#if (XML_COMBINED_VERSION >= 20402) + +PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification__doc__, +"SetBillionLaughsAttackProtectionMaximumAmplification($self, max_factor,\n" +" /)\n" +"--\n" +"\n" +"Sets the maximum tolerated amplification factor for protection against billion laughs attacks.\n" +"\n" +"The amplification factor is calculated as \"(direct + indirect) / direct\"\n" +"while parsing, where \"direct\" is the number of bytes read from the primary\n" +"document in parsing and \"indirect\" is the number of bytes added by expanding\n" +"entities and reading external DTD files, combined.\n" +"\n" +"The \'max_factor\' value must be a non-NaN floating point value greater than\n" +"or equal to 1.0. Amplification factors greater than 30,000 can be observed\n" +"in the middle of parsing even with benign files in practice. In particular,\n" +"the activation threshold should be carefully chosen to avoid false positives.\n" +"\n" +"By default, parser objects have a maximum amplification factor of 100.0."); + +#define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF \ + {"SetBillionLaughsAttackProtectionMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification__doc__}, + +static PyObject * +pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor); + +static PyObject * +pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "SetBillionLaughsAttackProtectionMaximumAmplification", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + float max_factor; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_CheckExact(args[0])) { + max_factor = (float) (PyFloat_AS_DOUBLE(args[0])); + } + else + { + max_factor = (float) PyFloat_AsDouble(args[0]); + if (max_factor == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + return_value = pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification_impl((xmlparseobject *)self, cls, max_factor); + +exit: + return return_value; +} + +#endif /* (XML_COMBINED_VERSION >= 20402) */ + #if (XML_COMBINED_VERSION >= 20702) PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, @@ -679,6 +805,14 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ +#ifndef PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONACTIVATIONTHRESHOLD_METHODDEF + #define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONACTIVATIONTHRESHOLD_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONACTIVATIONTHRESHOLD_METHODDEF) */ + +#ifndef PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF + #define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF) */ + #ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF) */ @@ -686,4 +820,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ -/*[clinic end generated code: output=e73935658c04c83e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f4677d6fb447c9cd input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 50db77c4f12c16..0e5bd817086585 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1184,6 +1184,116 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif +#if XML_COMBINED_VERSION >= 20402 +static PyObject * +set_activation_threshold(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold, + XML_Bool (*setter)(XML_Parser, unsigned long long)) +{ + assert(self->itself != NULL); + if (setter(self->itself, threshold) == XML_TRUE) { + Py_RETURN_NONE; + } + // The setter fails if self->itself is NULL (which is not possible here) + // or is a non-root parser, which currently only happens for parsers + // created by ExternalEntityParserCreate(). + pyexpat_state *state = PyType_GetModuleState(cls); + return set_invalid_arg(state, self, "parser must be a root parser"); +} + +static PyObject * +set_maximum_amplification(xmlparseobject *self, + PyTypeObject *cls, + float max_factor, + XML_Bool (*setter)(XML_Parser, float)) +{ + assert(self->itself != NULL); + if (setter(self->itself, max_factor) == XML_TRUE) { + Py_RETURN_NONE; + } + // The setter fails if self->itself is NULL (which is not possible here), + // is a non-root parser, which currently only happens for parsers created + // by ExternalEntityParserCreate(), or if 'max_factor' is NaN or < 1.0. + pyexpat_state *state = PyType_GetModuleState(cls); + // Note: Expat has no API to determine whether a parser is a root parser, + // and since the Expat functions for defining the various maximum allowed + // amplifcation factors fail when a bad parser or an out-of-range factor + // is given without specifying which check failed, we check whether the + // factor is out-of-range to improve the error message. See also gh-90949. + const char *message = (isnan(max_factor) || max_factor < 1.0f) + ? "'max_factor' must be at least 1.0" + : "parser must be a root parser"; + return set_invalid_arg(state, self, message); +} +#endif + +#if XML_COMBINED_VERSION >= 20402 +/*[clinic input] +@permit_long_summary +@permit_long_docstring_body +pyexpat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold + + cls: defining_class + threshold: unsigned_long_long + / + +Sets the number of output bytes needed to activate protection against billion laughs attacks. + +By default, parser objects have a protection activation threshold of 8 MiB. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold) +/*[clinic end generated code: output=0c082342f1c78114 input=a420a76f682ffc76]*/ +{ + return set_activation_threshold( + self, cls, threshold, + XML_SetBillionLaughsAttackProtectionActivationThreshold + ); +} +#endif + +#if XML_COMBINED_VERSION >= 20402 +/*[clinic input] +@permit_long_summary +@permit_long_docstring_body +pyexpat.xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification + + cls: defining_class + max_factor: float + / + +Sets the maximum tolerated amplification factor for protection against billion laughs attacks. + +The amplification factor is calculated as "(direct + indirect) / direct" +while parsing, where "direct" is the number of bytes read from the primary +document in parsing and "indirect" is the number of bytes added by expanding +entities and reading external DTD files, combined. + +The 'max_factor' value must be a non-NaN floating point value greater than +or equal to 1.0. Amplification factors greater than 30,000 can be observed +in the middle of parsing even with benign files in practice. In particular, +the activation threshold should be carefully chosen to avoid false positives. + +By default, parser objects have a maximum amplification factor of 100.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor) +/*[clinic end generated code: output=c590439eadf463fa input=aec034366805f6c7]*/ +{ + return set_maximum_amplification( + self, cls, max_factor, + XML_SetBillionLaughsAttackProtectionMaximumAmplification + ); +} +#endif + #if XML_COMBINED_VERSION >= 20702 /*[clinic input] @permit_long_summary @@ -1205,15 +1315,10 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, unsigned long long threshold) /*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/ { - assert(self->itself != NULL); - if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) { - Py_RETURN_NONE; - } - // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself - // is not a root parser (currently, this is equivalent to be created - // by ExternalEntityParserCreate()). - pyexpat_state *state = PyType_GetModuleState(cls); - return set_invalid_arg(state, self, "parser must be a root parser"); + return set_activation_threshold( + self, cls, threshold, + XML_SetAllocTrackerActivationThreshold + ); } #endif @@ -1248,24 +1353,10 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, float max_factor) /*[clinic end generated code: output=6e44bd48c9b112a0 input=3544abf9dd7ae055]*/ { - assert(self->itself != NULL); - if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) { - Py_RETURN_NONE; - } - // XML_SetAllocTrackerMaximumAmplification() can fail if self->itself - // is not a root parser (currently, this is equivalent to be created - // by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0. - // - // Expat does not provide a way to determine whether a parser is a root - // or not, nor does it provide a way to distinguish between failures in - // XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually - // detect the factor out-of-range issue here so that users have a better - // error message. - pyexpat_state *state = PyType_GetModuleState(cls); - const char *message = (isnan(max_factor) || max_factor < 1.0f) - ? "'max_factor' must be at least 1.0" - : "parser must be a root parser"; - return set_invalid_arg(state, self, message); + return set_maximum_amplification( + self, cls, max_factor, + XML_SetAllocTrackerMaximumAmplification + ); } #endif @@ -1278,6 +1369,8 @@ static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF + PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONACTIVATIONTHRESHOLD_METHODDEF + PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF @@ -2292,7 +2385,13 @@ pyexpat_exec(PyObject *mod) capi->SetAllocTrackerActivationThreshold = NULL; capi->SetAllocTrackerMaximumAmplification = NULL; #endif - +#if XML_COMBINED_VERSION >= 20400 + capi->SetBillionLaughsAttackProtectionActivationThreshold = XML_SetBillionLaughsAttackProtectionActivationThreshold; + capi->SetBillionLaughsAttackProtectionMaximumAmplification = XML_SetBillionLaughsAttackProtectionMaximumAmplification; +#else + capi->SetAllocTrackerActivationThreshold = NULL; + capi->SetAllocTrackerMaximumAmplification = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, pyexpat_capsule_destructor); From 501ef0eab298e3f5824929930c49f186a430f5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 20:25:00 +0200 Subject: [PATCH 2/6] Update pyexpat.c --- Modules/pyexpat.c | 46 +--------------------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index a0ef04984de108..670c20044c9ed9 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1174,7 +1174,7 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif -#if XML_COMBINED_VERSION >= 20402 +#if XML_COMBINED_VERSION >= 20400 static PyObject * set_activation_threshold(xmlparseobject *self, PyTypeObject *cls, @@ -1284,50 +1284,6 @@ pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification_impl(xmlp } #endif -#if XML_COMBINED_VERSION >= 20702 -static PyObject * -set_activation_threshold(xmlparseobject *self, - PyTypeObject *cls, - unsigned long long threshold, - XML_Bool (*setter)(XML_Parser, unsigned long long)) -{ - assert(self->itself != NULL); - if (setter(self->itself, threshold) == XML_TRUE) { - Py_RETURN_NONE; - } - // The setter fails if self->itself is NULL (which is not possible here) - // or is a non-root parser, which currently only happens for parsers - // created by ExternalEntityParserCreate(). - pyexpat_state *state = PyType_GetModuleState(cls); - return set_invalid_arg(state, self, "parser must be a root parser"); -} - -static PyObject * -set_maximum_amplification(xmlparseobject *self, - PyTypeObject *cls, - float max_factor, - XML_Bool (*setter)(XML_Parser, float)) -{ - assert(self->itself != NULL); - if (setter(self->itself, max_factor) == XML_TRUE) { - Py_RETURN_NONE; - } - // The setter fails if self->itself is NULL (which is not possible here), - // is a non-root parser, which currently only happens for parsers created - // by ExternalEntityParserCreate(), or if 'max_factor' is NaN or < 1.0. - pyexpat_state *state = PyType_GetModuleState(cls); - // Note: Expat has no API to determine whether a parser is a root parser, - // and since the Expat functions for defining the various maximum allowed - // amplifcation factors fail when a bad parser or an out-of-range factor - // is given without specifying which check failed, we check whether the - // factor is out-of-range to improve the error message. See also gh-90949. - const char *message = (isnan(max_factor) || max_factor < 1.0f) - ? "'max_factor' must be at least 1.0" - : "parser must be a root parser"; - return set_invalid_arg(state, self, message); -} -#endif - #if XML_COMBINED_VERSION >= 20702 /*[clinic input] @permit_long_summary From 197dfa7e82c08edccf998edea05ed5c05922b2b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 26 Sep 2025 20:25:55 +0200 Subject: [PATCH 3/6] Update pyexpat.c --- Modules/pyexpat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 670c20044c9ed9..1a325d65e037fd 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1218,7 +1218,7 @@ set_maximum_amplification(xmlparseobject *self, } #endif -#if XML_COMBINED_VERSION >= 20402 +#if XML_COMBINED_VERSION >= 20400 /*[clinic input] @permit_long_summary @permit_long_docstring_body @@ -1246,7 +1246,7 @@ pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold_impl(xmlpa } #endif -#if XML_COMBINED_VERSION >= 20402 +#if XML_COMBINED_VERSION >= 20400 /*[clinic input] @permit_long_summary @permit_long_docstring_body @@ -2382,6 +2382,7 @@ pyexpat_exec(PyObject *mod) capi->SetAllocTrackerActivationThreshold = NULL; capi->SetAllocTrackerMaximumAmplification = NULL; #endif + /* export using capsule */ PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, pyexpat_capsule_destructor); From 9734458b4f9518a53ad956753301b41714b27d4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 27 Sep 2025 10:18:55 +0200 Subject: [PATCH 4/6] address reviews --- Doc/library/pyexpat.rst | 18 ++++++++++-------- Doc/whatsnew/3.15.rst | 3 ++- ...25-09-26-18-04-28.gh-issue-90949.YHjSzX.rst | 6 +++--- Modules/clinic/pyexpat.c.h | 16 ++++++++-------- Modules/pyexpat.c | 16 ++++++++++------ 5 files changed, 33 insertions(+), 26 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 0606b1ab35138e..69d577069c9ddd 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -238,8 +238,8 @@ XMLParser Objects .. versionadded:: 3.13 -:class:`!xmlparser` objects have the following methods to mitigate some -common XML vulnerabilities. +:class:`!xmlparser` objects have the following methods to tune protections +against some common XML vulnerabilities. .. method:: xmlparser.SetBillionLaughsAttackProtectionActivationThreshold(threshold, /) @@ -249,8 +249,8 @@ common XML vulnerabilities. The number of output bytes includes amplification from entity expansion and reading DTD files. - By default, parser objects have a protection activation threshold of 8 MiB, - or equivalently 8,388,608 bytes. + Parser objects usually have a protection activation threshold of 8 MiB, + but the actual default value depends on the underlying Expat library. An :exc:`ExpatError` is raised if this method is called on a |xml-non-root-parser| parser. @@ -275,7 +275,8 @@ common XML vulnerabilities. benign files in practice. In particular, the activation threshold should be carefully chosen to avoid false positives. - By default, parser objects have a maximum amplification factor of 100.0. + Parser objects usually have a maximum amplification factor of 100, + but the actual default value depends on the underlying Expat library. An :exc:`ExpatError` is raised if this method is called on a |xml-non-root-parser| parser or if *max_factor* is outside the valid range. @@ -295,8 +296,8 @@ common XML vulnerabilities. Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. - By default, parser objects have an allocation activation threshold of 64 MiB, - or equivalently 67,108,864 bytes. + Parser objects usually have an allocation activation threshold of 64 MiB, + but the actual default value depends on the underlying Expat library. An :exc:`ExpatError` is raised if this method is called on a |xml-non-root-parser| parser. @@ -320,7 +321,8 @@ common XML vulnerabilities. near the start of parsing even with benign files in practice. In particular, the activation threshold should be carefully chosen to avoid false positives. - By default, parser objects have a maximum amplification factor of 100.0. + Parser objects usually have a maximum amplification factor of 100, + but the actual default value depends on the underlying Expat library. An :exc:`ExpatError` is raised if this method is called on a |xml-non-root-parser| parser or if *max_factor* is outside the valid range. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 56da9258404e8a..a103fb1a9c3bb4 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -564,7 +564,8 @@ xml.parsers.expat * Add :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold` and :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification` - to :ref:`xmlparser ` objects to tune `billion laughs`_ attacks protection. + to :ref:`xmlparser ` objects to tune protections against + `billion laughs`_ attacks. (Contributed by Bénédikt Tran in :gh:`90949`.) .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack diff --git a/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst b/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst index 22d831433e765d..dae1b618ca0d76 100644 --- a/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst +++ b/Misc/NEWS.d/next/Library/2025-09-26-18-04-28.gh-issue-90949.YHjSzX.rst @@ -2,6 +2,6 @@ Add :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold` and :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification` -to :ref:`xmlparser ` objects to mitigate `billion laughs -`_ attacks. Patch by -Bénédikt Tran. +to :ref:`xmlparser ` objects to tune protections against +`billion laughs `_ attacks. +Patch by Bénédikt Tran. diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 0c60ca9ea0e675..b93907a599a366 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -409,7 +409,7 @@ pyexpat_xmlparser_UseForeignDTD(PyObject *self, PyTypeObject *cls, PyObject *con #endif /* (XML_COMBINED_VERSION >= 19505) */ -#if (XML_COMBINED_VERSION >= 20402) +#if (XML_COMBINED_VERSION >= 20400) PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold__doc__, "SetBillionLaughsAttackProtectionActivationThreshold($self, threshold, /)\n" @@ -461,9 +461,9 @@ pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold(PyObject * return return_value; } -#endif /* (XML_COMBINED_VERSION >= 20402) */ +#endif /* (XML_COMBINED_VERSION >= 20400) */ -#if (XML_COMBINED_VERSION >= 20402) +#if (XML_COMBINED_VERSION >= 20400) PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification__doc__, "SetBillionLaughsAttackProtectionMaximumAmplification($self, max_factor,\n" @@ -482,7 +482,7 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplificat "in the middle of parsing even with benign files in practice. In particular,\n" "the activation threshold should be carefully chosen to avoid false positives.\n" "\n" -"By default, parser objects have a maximum amplification factor of 100.0."); +"By default, parser objects have a maximum amplification factor of 100."); #define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF \ {"SetBillionLaughsAttackProtectionMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification__doc__}, @@ -533,7 +533,7 @@ pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification(PyObject return return_value; } -#endif /* (XML_COMBINED_VERSION >= 20402) */ +#endif /* (XML_COMBINED_VERSION >= 20400) */ #if (XML_COMBINED_VERSION >= 20702) @@ -603,11 +603,11 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, "hierarchy.\n" "\n" "The \'max_factor\' value must be a non-NaN floating point value greater than\n" -"or equal to 1.0. Amplification factors greater than 100.0 can be observed\n" +"or equal to 1.0. Amplification factors greater than 100 can be observed\n" "near the start of parsing even with benign files in practice. In particular,\n" "the activation threshold should be carefully chosen to avoid false positives.\n" "\n" -"By default, parser objects have a maximum amplification factor of 100.0."); +"By default, parser objects have a maximum amplification factor of 100."); #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, @@ -820,4 +820,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ -/*[clinic end generated code: output=f4677d6fb447c9cd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=97bf658042d38f83 input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 1a325d65e037fd..cab2dd409c9458 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1230,7 +1230,8 @@ pyexpat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold Sets the number of output bytes needed to activate protection against billion laughs attacks. -By default, parser objects have a protection activation threshold of 8 MiB. +Parser objects usually have a protection activation threshold of 8 MiB, +but the actual default value depends on the underlying Expat library. [clinic start generated code]*/ static PyObject * @@ -1268,14 +1269,15 @@ or equal to 1.0. Amplification factors greater than 30,000 can be observed in the middle of parsing even with benign files in practice. In particular, the activation threshold should be carefully chosen to avoid false positives. -By default, parser objects have a maximum amplification factor of 100.0. +Parser objects usually have a maximum amplification factor of 100, +but the actual default value depends on the underlying Expat library. [clinic start generated code]*/ static PyObject * pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=c590439eadf463fa input=aec034366805f6c7]*/ +/*[clinic end generated code: output=c590439eadf463fa input=c5bae55c9b25d045]*/ { return set_maximum_amplification( self, cls, max_factor, @@ -1296,7 +1298,8 @@ pyexpat.xmlparser.SetAllocTrackerActivationThreshold Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. -By default, parser objects have an allocation activation threshold of 64 MiB. +Parser objects usually have an allocation activation threshold of 64 MiB, +but the actual default value depends on the underlying Expat library. [clinic start generated code]*/ static PyObject * @@ -1334,14 +1337,15 @@ or equal to 1.0. Amplification factors greater than 100.0 can be observed near the start of parsing even with benign files in practice. In particular, the activation threshold should be carefully chosen to avoid false positives. -By default, parser objects have a maximum amplification factor of 100.0. +Parser objects usually have a maximum amplification factor of 100, +but the actual default value depends on the underlying Expat library. [clinic start generated code]*/ static PyObject * pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=6e44bd48c9b112a0 input=3544abf9dd7ae055]*/ +/*[clinic end generated code: output=6e44bd48c9b112a0 input=aac2029e96e80b03]*/ { return set_maximum_amplification( self, cls, max_factor, From 6c2eab775d5be4e2a53362f2fb34bc893dd1a39a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 27 Sep 2025 11:12:26 +0200 Subject: [PATCH 5/6] clinic and comments --- Doc/library/pyexpat.rst | 5 +++++ Modules/clinic/pyexpat.c.h | 22 ++++++++++++++++------ Modules/pyexpat.c | 14 ++++++++++---- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 69d577069c9ddd..9aae5c9da7471d 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -257,6 +257,11 @@ against some common XML vulnerabilities. The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` should not be used as they may have no special meaning. + .. note:: + + Activation thresholds below 4 MiB are known to break support for DITA 1.3 + payload and are hence not recommended. + .. versionadded:: next .. method:: xmlparser.SetBillionLaughsAttackProtectionMaximumAmplification(max_factor, /) diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index b93907a599a366..ff2e28269dc927 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -417,7 +417,14 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThresho "\n" "Sets the number of output bytes needed to activate protection against billion laughs attacks.\n" "\n" -"By default, parser objects have a protection activation threshold of 8 MiB."); +"The number of output bytes includes amplification from entity expansion\n" +"and reading DTD files.\n" +"\n" +"Parser objects usually have a protection activation threshold of 8 MiB,\n" +"but the actual default value depends on the underlying Expat library.\n" +"\n" +"Activation thresholds below 4 MiB are known to break support for DITA 1.3\n" +"payload and are hence not recommended."); #define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONACTIVATIONTHRESHOLD_METHODDEF \ {"SetBillionLaughsAttackProtectionActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold__doc__}, @@ -482,7 +489,8 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplificat "in the middle of parsing even with benign files in practice. In particular,\n" "the activation threshold should be carefully chosen to avoid false positives.\n" "\n" -"By default, parser objects have a maximum amplification factor of 100."); +"Parser objects usually have a maximum amplification factor of 100,\n" +"but the actual default value depends on the underlying Expat library."); #define PYEXPAT_XMLPARSER_SETBILLIONLAUGHSATTACKPROTECTIONMAXIMUMAMPLIFICATION_METHODDEF \ {"SetBillionLaughsAttackProtectionMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification__doc__}, @@ -543,7 +551,8 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, "\n" "Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.\n" "\n" -"By default, parser objects have an allocation activation threshold of 64 MiB."); +"Parser objects usually have an allocation activation threshold of 64 MiB,\n" +"but the actual default value depends on the underlying Expat library."); #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF \ {"SetAllocTrackerActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__}, @@ -603,11 +612,12 @@ PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, "hierarchy.\n" "\n" "The \'max_factor\' value must be a non-NaN floating point value greater than\n" -"or equal to 1.0. Amplification factors greater than 100 can be observed\n" +"or equal to 1.0. Amplification factors greater than 100.0 can be observed\n" "near the start of parsing even with benign files in practice. In particular,\n" "the activation threshold should be carefully chosen to avoid false positives.\n" "\n" -"By default, parser objects have a maximum amplification factor of 100."); +"Parser objects usually have a maximum amplification factor of 100,\n" +"but the actual default value depends on the underlying Expat library."); #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, @@ -820,4 +830,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ -/*[clinic end generated code: output=97bf658042d38f83 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=81101a16a409daf6 input=a9049054013a1b77]*/ diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index cab2dd409c9458..7f6d84ad8641ca 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1230,15 +1230,21 @@ pyexpat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold Sets the number of output bytes needed to activate protection against billion laughs attacks. +The number of output bytes includes amplification from entity expansion +and reading DTD files. + Parser objects usually have a protection activation threshold of 8 MiB, but the actual default value depends on the underlying Expat library. + +Activation thresholds below 4 MiB are known to break support for DITA 1.3 +payload and are hence not recommended. [clinic start generated code]*/ static PyObject * pyexpat_xmlparser_SetBillionLaughsAttackProtectionActivationThreshold_impl(xmlparseobject *self, PyTypeObject *cls, unsigned long long threshold) -/*[clinic end generated code: output=0c082342f1c78114 input=a420a76f682ffc76]*/ +/*[clinic end generated code: output=0c082342f1c78114 input=fa2f91f26b62a42a]*/ { return set_activation_threshold( self, cls, threshold, @@ -1277,7 +1283,7 @@ static PyObject * pyexpat_xmlparser_SetBillionLaughsAttackProtectionMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=c590439eadf463fa input=c5bae55c9b25d045]*/ +/*[clinic end generated code: output=c590439eadf463fa input=cc1e97c1fd2bd950]*/ { return set_maximum_amplification( self, cls, max_factor, @@ -1306,7 +1312,7 @@ static PyObject * pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, PyTypeObject *cls, unsigned long long threshold) -/*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/ +/*[clinic end generated code: output=bed7e93207ba08c5 input=b7a7a3e3d054286a]*/ { return set_activation_threshold( self, cls, threshold, @@ -1345,7 +1351,7 @@ static PyObject * pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, PyTypeObject *cls, float max_factor) -/*[clinic end generated code: output=6e44bd48c9b112a0 input=aac2029e96e80b03]*/ +/*[clinic end generated code: output=6e44bd48c9b112a0 input=c6af7ccb76ae5c6b]*/ { return set_maximum_amplification( self, cls, max_factor, From 3ec2ebd647d04c8ae75470d9cb5024b4dfd31644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Sep 2025 09:52:36 +0200 Subject: [PATCH 6/6] update NEWS --- Doc/whatsnew/3.15.rst | 4 ++-- .../Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index a103fb1a9c3bb4..31e1bfd9a438ba 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -558,8 +558,8 @@ xml.parsers.expat * Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` - to :ref:`xmlparser ` objects to prevent use of - disproportional amounts of dynamic memory from within an Expat parser. + to :ref:`xmlparser ` objects to tune protections against + disproportional amounts of dynamic memory usage from within an Expat parser. (Contributed by Bénédikt Tran in :gh:`90949`.) * Add :meth:`~xml.parsers.expat.xmlparser.SetBillionLaughsAttackProtectionActivationThreshold` diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst index 5611f33fb8e37b..c4c9573b1d140a 100644 --- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst +++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst @@ -1,5 +1,5 @@ Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` -to :ref:`xmlparser ` objects to prevent use of -disproportional amounts of dynamic memory from within an Expat parser. +to :ref:`xmlparser ` objects to tune protections against +disproportional amounts of dynamic memory usage from within an Expat parser. Patch by Bénédikt Tran.