Skip to content

Commit

Permalink
Python3: fix test_parser test for removing invalit entity
Browse files Browse the repository at this point in the history
  • Loading branch information
PetrDlouhy committed Jan 12, 2018
1 parent 4060d77 commit 3ee6710
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions tests/test_parser.py
Expand Up @@ -141,8 +141,6 @@
("""<a href="&hulla;ailto:" >""", """<a href="ailto:">"""),
("""<a href="&#109;ailto:" >""", """<a href="mailto:">"""),
("""<a href="&#x6D;ailto:" >""", """<a href="mailto:">"""),
# note that \u8156 is not valid encoding and therefore gets removed
("""<a href="&#8156;ailto:" >""", """<a href="ailto:">"""),
# non-ascii characters
("""<Üzgür> fahr </langsamer> ¹²³¼½¬{""",
"""<Üzgür> fahr </langsamer> ¹²³¼½¬{"""),
Expand Down Expand Up @@ -217,6 +215,20 @@ def test_parse (self):
self.htmlparser.feed(_in)
self.check_results(self.htmlparser, _in, _out, out)

def test_parse_invalid_encoding (self):
# Parse all test patterns in one go.
# The \u8156 is not valid encoding and therefore gets removed in Python 2.
# Python 3 doesn't require to decode strings from parser, so id doesn't get removed.
if sys.version_info < (3, 0):
_in, _out = """<a href="&#8156;ailto:" >""", """<a href="ailto:">"""
else:
_in, _out = """<a href="&#8156;ailto:" >""", """<a href="&#8156;ailto:">"""
out = StringIO()
handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser.handler = handler
self.htmlparser.feed(_in)
self.check_results(self.htmlparser, _in, _out, out)

def check_results (self, htmlparser, _in, _out, out):
"""
Check parse results.
Expand Down

0 comments on commit 3ee6710

Please sign in to comment.