diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 9b4f09599134bd..7eea885cfe63c5 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -45,7 +45,7 @@ ( (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name ) - (= # value indicator + ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator ('[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value @@ -57,7 +57,7 @@ [a-zA-Z][^\t\n\r\f />]* # tag name [\t\n\r\f /]* # optional whitespace before attribute name (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name - (?:= # value indicator + (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 15cad061889a79..47c0752fb517b9 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -623,7 +623,7 @@ def test_correct_detection_of_start_tags(self): html = '
The rain' expected = [ - ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]), + ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]), ('starttag', 'b', []), ('data', 'The '), ('starttag', 'a', [('href', 'some_url')]), @@ -813,12 +813,12 @@ def test_attr_syntax(self): ] self._run_check("""""", output) self._run_check("", [('starttag', 'a', [('foo', '=bar')])]) - self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) - self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo\v', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo\xa0', 'bar')])]) - self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) - self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo', '\vbar')])]) self._run_check("", [('starttag', 'a', [('foo', '\xa0bar')])]) @@ -829,8 +829,8 @@ def test_attr_values(self): ("d", "\txyz\n")])]) self._run_check("""""", [("starttag", "a", [("b", ""), ("c", "")])]) - self._run_check("", - [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [('starttag', 'a', [('b', 'x'), ('c', 'y')])]) self._run_check("", [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. @@ -899,13 +899,17 @@ def test_malformed_attributes(self): ) expected = [ ('starttag', 'a', [('href', "test'style='color:red;bad1'")]), - ('data', 'test - bad1'), ('endtag', 'a'), + ('data', 'test - bad1'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'+style='color:red;ba2'")]), - ('data', 'test - bad2'), ('endtag', 'a'), + ('data', 'test - bad2'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), - ('data', 'test - bad3'), ('endtag', 'a'), - ('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]), - ('data', 'test - bad4'), ('endtag', 'a') + ('data', 'test - bad3'), + ('endtag', 'a'), + ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), + ('data', 'test - bad4'), + ('endtag', 'a'), ] self._run_check(html, expected) diff --git a/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst index b6f9e104e44047..27e886abdb58e5 100644 --- a/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst +++ b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -18,8 +18,3 @@ according to the HTML5 standard. * Multiple ``=`` between attribute name and value are no longer collapsed. E.g. ```` produces attribute "foo" with value "=bar". - -* Whitespaces between the ``=`` separator and attribute name or value are no - longer ignored. E.g. ```` produces two attributes "foo" and - "=bar", both with value None; ```` produces two attributes: - "foo" with value "" and "bar" with value None.