Skip to content

Commit

Permalink
Modified helpers common_html_entity and replace_html_entity() to use …
Browse files Browse the repository at this point in the history
…the HTML entity definitions from html.entities.html5
  • Loading branch information
ptmcg committed Oct 22, 2021
1 parent 69b6c5a commit aab37b6
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Expand Up @@ -16,6 +16,9 @@ Version 3.0.0 -
. added mark_control argument to support highlighting of control characters using
'.' or Unicode symbols, such as "␍" and "␊".

- Modified helpers common_html_entity and replace_html_entity() to use the HTML
entity definitions from html.entities.html5.


Version 3.0.0rc2 -
------------------
Expand Down
7 changes: 4 additions & 3 deletions pyparsing/helpers.py
@@ -1,4 +1,6 @@
# helpers.py
import html.entities

from .core import *
from .util import _bslash, _flatten, _escapeRegexRangeChars

Expand Down Expand Up @@ -648,10 +650,9 @@ def make_xml_tags(
Word(alphas, alphanums + "_:").set_name("any tag")
)


_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), "><& \"'"))
_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
common_html_entity = Regex(
"&(?P<entity>" + "|".join(_htmlEntityMap.keys()) + ");"
"&(?P<entity>" + "|".join(_htmlEntityMap) + ");"
).set_name("common HTML entity")


Expand Down
25 changes: 25 additions & 0 deletions tests/test_unit.py
Expand Up @@ -1860,6 +1860,31 @@ def testRecursiveCombine(self):

self.assertParseResultsEquals(testVal, expected_list=expected)

def testHTMLEntities(self):
html_source = dedent("""\
This &amp; that
2 &gt; 1
0 &lt; 1
Don&apos;t get excited!
I said &quot;Don&apos;t get excited!&quot;
Copyright &copy; 2021
Dot &longrightarrow; &dot;
""")
transformer = pp.common_html_entity.add_parse_action(pp.replace_html_entity)
transformed = transformer.transform_string(html_source)
print(transformed)

expected = dedent("""\
This & that
2 > 1
0 < 1
Don't get excited!
I said "Don't get excited!"
Copyright © 2021
Dot ⟶ ˙
""")
self.assertEqual(expected, transformed)

def testInfixNotationBasicArithEval(self):
import ast

Expand Down

0 comments on commit aab37b6

Please sign in to comment.