Skip to content

Commit

Permalink
avoid ambiguous regex in striptags
Browse files Browse the repository at this point in the history
  • Loading branch information
davidism committed Mar 14, 2022
1 parent 9ddec7a commit b15d9d6
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Version 2.1.1
-------------

Unreleased

- Avoid ambiguous regex matches in ``striptags``. :pr:`293`


Version 2.1.0
-------------

Expand Down
12 changes: 8 additions & 4 deletions src/markupsafe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ def __html__(self) -> str:
pass


__version__ = "2.1.0"
__version__ = "2.1.1.dev0"

_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
_strip_comments_re = re.compile(r"<!--.*?-->")
_strip_tags_re = re.compile(r"<.*?>")


def _simple_escaping_wrapper(name: str) -> t.Callable[..., "Markup"]:
Expand Down Expand Up @@ -158,8 +159,11 @@ def striptags(self) -> str:
>>> Markup("Main &raquo;\t<em>About</em>").striptags()
'Main » About'
"""
stripped = " ".join(_striptags_re.sub("", self).split())
return Markup(stripped).unescape()
# Use two regexes to avoid ambiguous matches.
value = _strip_comments_re.sub("", self)
value = _strip_tags_re.sub("", value)
value = " ".join(value.split())
return Markup(value).unescape()

@classmethod
def escape(cls, s: t.Any) -> "Markup":
Expand Down
10 changes: 9 additions & 1 deletion tests/test_markupsafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ def test_dict_interpol():

def test_escaping(escape):
assert escape("\"<>&'") == "&#34;&lt;&gt;&amp;&#39;"
assert Markup("<em>Foo &amp; Bar</em>").striptags() == "Foo & Bar"
assert (
Markup(
"<!-- outer comment -->"
"<em>Foo &amp; Bar"
"<!-- inner comment about <em> -->"
"</em>"
).striptags()
== "Foo & Bar"
)


def test_unescape():
Expand Down

0 comments on commit b15d9d6

Please sign in to comment.