Skip to content

Commit

Permalink
Remove defusedxml dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
Some User committed Dec 8, 2022
1 parent 1252c6b commit 078fdcd
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 15 deletions.
2 changes: 0 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ def __getattr__(cls, name):
"lxml",
"lxml.html",
"lxml.etree",
"defusedxml",
"defusedxml.lxml",
]
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)

Expand Down
1 change: 0 additions & 1 deletion docs/usage/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,3 @@ install Grab with pip. Here is list of Grab dependencies::
user_agent
selection
lxml
defusedxml
13 changes: 7 additions & 6 deletions grab/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
from io import BytesIO, StringIO
from urllib.parse import parse_qs, urljoin, urlsplit

import defusedxml.lxml
from lxml.etree import ParserError, XMLParser # pytype: disable=import-error
from lxml import etree
from lxml.html import CheckboxValues, HTMLParser, MultipleSelectOptions
from selection import XpathSelector

Expand Down Expand Up @@ -583,12 +582,14 @@ def _build_dom(cls, content, mode, charset):
parser = THREAD_STORAGE.html_parsers.setdefault(
charset, HTMLParser(encoding=charset)
)
dom = defusedxml.lxml.parse(io_cls(content), parser=parser)
dom = etree.parse(io_cls(content), parser=parser)
return dom.getroot()
if not hasattr(THREAD_STORAGE, "xml_parser"):
THREAD_STORAGE.xml_parsers = {}
parser = THREAD_STORAGE.xml_parsers.setdefault(charset, XMLParser())
dom = defusedxml.lxml.parse(io_cls(content), parser=parser)
parser = THREAD_STORAGE.xml_parsers.setdefault(
charset, etree.XMLParser(resolve_entities=False)
)
dom = etree.parse(io_cls(content), parser=parser)
return dom.getroot()

def build_html_tree(self):
Expand All @@ -613,7 +614,7 @@ def build_html_tree(self):
except Exception as ex: # pylint: disable=broad-except
# FIXME: write test for this case
if (
isinstance(ex, ParserError) # noqa: SIM114
isinstance(ex, etree.ParserError) # noqa: SIM114
and "Document is empty" in str(ex)
and b"<html" not in body
):
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ dependencies = [
"user_agent",
"selection",
"lxml",
"defusedxml",
"urllib3[socks]",
"certifi",
]
Expand Down
2 changes: 1 addition & 1 deletion runtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
"tests.grab_response_body_processing",
"tests.grab_charset",
"tests.grab_redirect",
"tests.grab_defusedxml",
"tests.grab_document",
# *** Network
"tests.grab_get_request",
Expand Down Expand Up @@ -54,6 +53,7 @@
"tests.grab_error",
"tests.ext_pyquery",
# *** Other things
"tests.xml_security",
"tests.raw_server",
"tests.misc",
"tests.test_util_http",
Expand Down
4 changes: 2 additions & 2 deletions tests/grab_response_body_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def test_github_html_processing(self):
self.assertTrue("tools-for-open-source" in items[2])

def test_explicit_custom_charset(self):
g = build_grab(
grab = build_grab(
"<html><head></head><body><h1>привет</h1></body></html".encode("cp1251"),
document_charset="cp1251",
)
self.assertEqual("привет", g.doc.select("//h1").text())
self.assertEqual("привет", grab.doc.select("//h1").text())
4 changes: 2 additions & 2 deletions tests/grab_defusedxml.py → tests/xml_security.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# See details here: https://github.com/tiran/defusedxml/blob/master/README.md
# https://lxml.de/FAQ.html#how-do-i-use-lxml-safely-as-a-web-service-endpoint
import os
from io import BytesIO

from lxml.etree import parse # pytype: disable=import-error
from lxml.etree import parse

from tests.util import BaseGrabTestCase, temp_dir

Expand Down

0 comments on commit 078fdcd

Please sign in to comment.