Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/streamlink/utils/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,8 @@ def parse_html(
"""Wrapper around lxml.etree.HTML with some extras.

Provides these extra features:
- Handles incorrectly encoded HTML
- Wraps errors in custom exception with a snippet of the data in the message
"""
if isinstance(data, str):
data = bytes(data, "utf8")

return _parse(HTML, data, name, exception, schema, *args, **kwargs)


Expand Down
22 changes: 21 additions & 1 deletion tests/utils/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from streamlink.exceptions import PluginError
from streamlink.plugin.api import validate
from streamlink.plugin.api.validate import xml_element
from streamlink.utils.parse import parse_json, parse_qsd, parse_xml
from streamlink.utils.parse import parse_html, parse_json, parse_qsd, parse_xml


class TestUtilsParse(unittest.TestCase):
Expand Down Expand Up @@ -69,6 +69,26 @@ def test_parse_xml_entities(self):
self.assertEqual(expected.tag, actual.tag)
self.assertEqual(expected.attrib, actual.attrib)

def test_parse_xml_encoding(self):
tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""")
self.assertEqual(tree.xpath(".//text()"), ["ä"])
tree = parse_xml("""<test>ä</test>""")
self.assertEqual(tree.xpath(".//text()"), ["ä"])
tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""")
self.assertEqual(tree.xpath(".//text()"), ["ä"])
tree = parse_xml(b"""<test>\xC3\xA4</test>""")
self.assertEqual(tree.xpath(".//text()"), ["ä"])

def test_parse_html_encoding(self):
tree = parse_html("""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""")
self.assertEqual(tree.xpath(".//body/text()"), ["ä"])
tree = parse_html("""<!DOCTYPE html><html><body>ä</body></html>""")
self.assertEqual(tree.xpath(".//body/text()"), ["ä"])
tree = parse_html(b"""<!DOCTYPE html><html><meta charset="utf-8"/><body>\xC3\xA4</body></html>""")
self.assertEqual(tree.xpath(".//body/text()"), ["ä"])
tree = parse_html(b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""")
self.assertEqual(tree.xpath(".//body/text()"), ["ä"])

def test_parse_qsd(self):
self.assertEqual(
{"test": "1", "foo": "bar"},
Expand Down