Update html and its test from CPython 3.10.5

CPython developers · youknowone · commit dbb128e1dee1 · 2022-08-07T16:29:58.000+09:00
diff --git a/Lib/html/entities.py b/Lib/html/entities.py
@@ -4,6 +4,7 @@
 
 
 # maps the HTML entity name to the Unicode code point
+# from https://html.spec.whatwg.org/multipage/named-characters.html
 name2codepoint = {
     'AElig':    0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
     'Aacute':   0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
@@ -9,7 +9,6 @@
 
 
 import re
-import warnings
 import _markupbase
 
 from html import unescape
@@ -47,7 +46,7 @@
           |"[^"]*"                   # LIT-enclosed value
           |(?!['"])[^>\s]*           # bare value
          )
-         (?:\s*,)*                   # possibly followed by a comma
+        \s*                          # possibly followed by a space
        )?(?:\s|/(?!>))*
      )*
    )?
@@ -406,7 +405,7 @@ def parse_endtag(self, i):
             tagname = namematch.group(1).lower()
             # consume and ignore other stuff between the name and the >
             # Note: this is not 100% correct, since we might have things like
-            # </tag attr=">">, but looking for > after tha name should cover
+            # </tag attr=">">, but looking for > after the name should cover
             # most of the cases and is much simpler
             gtpos = rawdata.find('>', namematch.end())
             self.handle_endtag(tagname)
@@ -418,7 +417,7 @@ def parse_endtag(self, i):
                 self.handle_data(rawdata[i:gtpos])
                 return gtpos
 
-        self.handle_endtag(elem.lower())
+        self.handle_endtag(elem)
         self.clear_cdata_mode()
         return gtpos
 
@@ -461,10 +460,3 @@ def handle_pi(self, data):
 
     def unknown_decl(self, data):
         pass
-
-    # Internal -- helper to remove special character quoting
-    def unescape(self, s):
-        warnings.warn('The unescape method is deprecated and will be removed '
-                      'in 3.5, use html.unescape() instead.',
-                      DeprecationWarning, stacklevel=2)
-        return unescape(s)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
@@ -537,13 +537,6 @@ def test_EOF_in_charref(self):
         for html, expected in data:
             self._run_check(html, expected)
 
-    def test_unescape_method(self):
-        from html import unescape
-        p = self.get_collector()
-        with self.assertWarns(DeprecationWarning):
-            s = '&quot;&#34;&#x22;&quot&#34&#x22&#bad;'
-            self.assertEqual(p.unescape(s), unescape(s))
-
     def test_broken_comments(self):
         html = ('<! not really a comment >'
                 '<! not a comment either -->'