Skip to content

Commit dbb128e

Browse files
CPython developersyouknowone
authored andcommitted
Update html and its test from CPython 3.10.5
1 parent 33db28d commit dbb128e

File tree

3 files changed

+4
-18
lines changed

3 files changed

+4
-18
lines changed

Lib/html/entities.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55

66
# maps the HTML entity name to the Unicode code point
7+
# from https://html.spec.whatwg.org/multipage/named-characters.html
78
name2codepoint = {
89
'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
910
'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1

Lib/html/parser.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010

1111
import re
12-
import warnings
1312
import _markupbase
1413

1514
from html import unescape
@@ -47,7 +46,7 @@
4746
|"[^"]*" # LIT-enclosed value
4847
|(?!['"])[^>\s]* # bare value
4948
)
50-
(?:\s*,)* # possibly followed by a comma
49+
\s* # possibly followed by a space
5150
)?(?:\s|/(?!>))*
5251
)*
5352
)?
@@ -406,7 +405,7 @@ def parse_endtag(self, i):
406405
tagname = namematch.group(1).lower()
407406
# consume and ignore other stuff between the name and the >
408407
# Note: this is not 100% correct, since we might have things like
409-
# </tag attr=">">, but looking for > after tha name should cover
408+
# </tag attr=">">, but looking for > after the name should cover
410409
# most of the cases and is much simpler
411410
gtpos = rawdata.find('>', namematch.end())
412411
self.handle_endtag(tagname)
@@ -418,7 +417,7 @@ def parse_endtag(self, i):
418417
self.handle_data(rawdata[i:gtpos])
419418
return gtpos
420419

421-
self.handle_endtag(elem.lower())
420+
self.handle_endtag(elem)
422421
self.clear_cdata_mode()
423422
return gtpos
424423

@@ -461,10 +460,3 @@ def handle_pi(self, data):
461460

462461
def unknown_decl(self, data):
463462
pass
464-
465-
# Internal -- helper to remove special character quoting
466-
def unescape(self, s):
467-
warnings.warn('The unescape method is deprecated and will be removed '
468-
'in 3.5, use html.unescape() instead.',
469-
DeprecationWarning, stacklevel=2)
470-
return unescape(s)

Lib/test/test_htmlparser.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -537,13 +537,6 @@ def test_EOF_in_charref(self):
537537
for html, expected in data:
538538
self._run_check(html, expected)
539539

540-
def test_unescape_method(self):
541-
from html import unescape
542-
p = self.get_collector()
543-
with self.assertWarns(DeprecationWarning):
544-
s = '&quot;&#34;&#x22;&quot&#34&#x22&#bad;'
545-
self.assertEqual(p.unescape(s), unescape(s))
546-
547540
def test_broken_comments(self):
548541
html = ('<! not really a comment >'
549542
'<! not a comment either -->'

0 commit comments

Comments
 (0)