diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst index 5ee11d58c3dd26..7d950477443390 100644 --- a/Doc/library/xml.sax.utils.rst +++ b/Doc/library/xml.sax.utils.rst @@ -59,6 +59,20 @@ or as base classes. using the reference concrete syntax. +.. function:: is_valid_name(name) + + Return ``True`` if the string is a valid element or attribute name, + ``False`` otherwise. + + Almost all characters are permitted in names, except control characters and + those which either are or reasonably could be used as delimiters. + Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/", + "!", "?", and "=" are forbidden. + The name cannot start with a digit or a character like "-", ".", and "·". + + ..versionadded:: next + + .. class:: XMLGenerator(out=None, encoding='iso-8859-1', short_empty_elements=False) This class implements the :class:`~xml.sax.handler.ContentHandler` interface diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 4b176d6c8e6034..871fd661b8e3df 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -580,6 +580,14 @@ xml.parsers.expat .. _billion laughs: https://en.wikipedia.org/wiki/Billion_laughs_attack +xml.sax.saxutils +---------------- + +* Add the :func:`~xml.sax.saxutils.is_valid_name` function, which allows to check + whether a string can be used as an element or attribute name in XML. + (Contributed by Serhiy Storchaka in :gh:`139489`.) + + zlib ---- diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 5c10bcedc69bc6..4f0824d3eea068 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -9,8 +9,9 @@ except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise unittest.SkipTest("no XML parsers available") -from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ - XMLFilterBase, prepare_input_source +from xml.sax.saxutils import (XMLGenerator, escape, unescape, quoteattr, + is_valid_name, + XMLFilterBase, prepare_input_source) from xml.sax.expatreader import create_parser from xml.sax.handler import (feature_namespaces, feature_external_ges, LexicalHandler) @@ -343,6 +344,23 @@ def test_single_double_quoteattr(self): self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), "\"Includes 'single' and "double" quotes\"") + def test_is_valid_name(self): + self.assertFalse(is_valid_name('')) + self.assertTrue(is_valid_name('name')) + self.assertTrue(is_valid_name('NAME')) + self.assertTrue(is_valid_name('name0:-._·')) + self.assertTrue(is_valid_name('_')) + self.assertTrue(is_valid_name(':')) + self.assertTrue(is_valid_name('Ñàḿĕ')) + self.assertTrue(is_valid_name('\U000EFFFF')) + self.assertFalse(is_valid_name('0')) + self.assertFalse(is_valid_name('-')) + self.assertFalse(is_valid_name('.')) + self.assertFalse(is_valid_name('·')) + self.assertFalse(is_valid_name('na me')) + for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000': + self.assertFalse(is_valid_name('name' + c)) + # ===== make_parser def test_make_parser(self): # Creating a parser should succeed - it should fall back diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py index c1612ea1cebc5d..62da6f02c3f26f 100644 --- a/Lib/xml/sax/saxutils.py +++ b/Lib/xml/sax/saxutils.py @@ -3,9 +3,12 @@ convenience of application and driver writers. """ -import os, urllib.parse, urllib.request -import io import codecs +import io +import os +import re +import urllib.parse +import urllib.request from . import handler from . import xmlreader @@ -67,6 +70,29 @@ def quoteattr(data, entities={}): data = '"%s"' % data return data +def is_valid_name(name): + """Test whether a string is a valid element or attribute name.""" + # https://www.w3.org/TR/xml/#NT-Name + return re.fullmatch( + # NameStartChar + '[' + ':A-Z_a-z' + '\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF' + '\u200C\u200D' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']' + # NameChar + '[' + r'\-.0-9:A-Z_a-z' + '\xB7' + '\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF' + '\u200C\u200D\u203F\u2040' + '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' + ']*', + name) is not None + def _gettextwriter(out, encoding): if out is None: diff --git a/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst new file mode 100644 index 00000000000000..3f99d7f7f957e0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-08-15-36-00.gh-issue-139489.W46tvn.rst @@ -0,0 +1,2 @@ +Add the :func:`~xml.sax.saxutils.is_valid_name` function, which allows to check +whether a string can be used as an element or attribute name in XML.