From 3ee671022ad409651d9b0dcf6fc21f7ab4bb0404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Dlouh=C3=BD?= Date: Sun, 7 Jan 2018 02:05:50 +0100 Subject: [PATCH] Python3: fix test_parser test for removing invalit entity --- tests/test_parser.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 662d75878b..6095d611cb 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -141,8 +141,6 @@ ("""""", """"""), ("""""", """"""), ("""""", """"""), - # note that \u8156 is not valid encoding and therefore gets removed - ("""""", """"""), # non-ascii characters ("""<Üzgür> fahr ¹²³¼½¬{""", """<Üzgür> fahr ¹²³¼½¬{"""), @@ -217,6 +215,20 @@ def test_parse (self): self.htmlparser.feed(_in) self.check_results(self.htmlparser, _in, _out, out) + def test_parse_invalid_encoding (self): + # Parse all test patterns in one go. + # The \u8156 is not valid encoding and therefore gets removed in Python 2. + # Python 3 doesn't require to decode strings from parser, so id doesn't get removed. + if sys.version_info < (3, 0): + _in, _out = """""", """""" + else: + _in, _out = """""", """""" + out = StringIO() + handler = linkcheck.HtmlParser.htmllib.HtmlPrettyPrinter(out) + self.htmlparser.handler = handler + self.htmlparser.feed(_in) + self.check_results(self.htmlparser, _in, _out, out) + def check_results (self, htmlparser, _in, _out, out): """ Check parse results.