Skip to content
This repository has been archived by the owner on Jan 19, 2022. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
adding defusedxml
  • Loading branch information
peterbe committed Mar 27, 2015
1 parent 04f82d4 commit 01f49e2
Show file tree
Hide file tree
Showing 11 changed files with 941 additions and 0 deletions.
112 changes: 112 additions & 0 deletions lib/python/defusedxml/ElementTree.py
@@ -0,0 +1,112 @@
# defusedxml
#
# Copyright (c) 2013 by Christian Heimes <christian@python.org>
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
"""Defused xml.etree.ElementTree facade
"""
from __future__ import print_function, absolute_import

import sys
from .common import PY3, PY26, PY31
if PY3:
import importlib
else:
from xml.etree.ElementTree import XMLParser as _XMLParser
from xml.etree.ElementTree import iterparse as _iterparse
if PY26:
from xml.parsers.expat import ExpatError as ParseError
else:
from xml.etree.ElementTree import ParseError
_IterParseIterator = None
from xml.etree.ElementTree import TreeBuilder as _TreeBuilder
from xml.etree.ElementTree import parse as _parse
from xml.etree.ElementTree import tostring

from .common import (DTDForbidden, EntitiesForbidden,
ExternalReferenceForbidden, _generate_etree_functions)

__origin__ = "xml.etree.ElementTree"

def _get_py3_cls():
"""Python 3.3 hides the pure Python code but defusedxml requires it.
The code is based on test.support.import_fresh_module().
"""
pymodname = "xml.etree.ElementTree"
cmodname = "_elementtree"

pymod = sys.modules.pop(pymodname, None)
cmod = sys.modules.pop(cmodname, None)

sys.modules[cmodname] = None
pure_pymod = importlib.import_module(pymodname)
if cmod is not None:
sys.modules[cmodname] = cmod
else:
sys.modules.pop(cmodname)
sys.modules[pymodname] = pymod

_XMLParser = pure_pymod.XMLParser
_iterparse = pure_pymod.iterparse
if PY31:
_IterParseIterator = None
from xml.parsers.expat import ExpatError as ParseError
else:
_IterParseIterator = pure_pymod._IterParseIterator
ParseError = pure_pymod.ParseError

return _XMLParser, _iterparse, _IterParseIterator, ParseError

if PY3:
_XMLParser, _iterparse, _IterParseIterator, ParseError = _get_py3_cls()


class DefusedXMLParser(_XMLParser):
def __init__(self, html=0, target=None, encoding=None,
forbid_dtd=False, forbid_entities=True,
forbid_external=True):
if PY26 or PY31:
_XMLParser.__init__(self, html, target)
else:
# Python 2.x old style class
_XMLParser.__init__(self, html, target, encoding)
self.forbid_dtd = forbid_dtd
self.forbid_entities = forbid_entities
self.forbid_external = forbid_external
if PY3 and not PY31:
parser = self.parser
else:
parser = self._parser
if self.forbid_dtd:
parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
if self.forbid_entities:
parser.EntityDeclHandler = self.defused_entity_decl
parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
if self.forbid_external:
parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler

def defused_start_doctype_decl(self, name, sysid, pubid,
has_internal_subset):
raise DTDForbidden(name, sysid, pubid)

def defused_entity_decl(self, name, is_parameter_entity, value, base,
sysid, pubid, notation_name):
raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)

def defused_unparsed_entity_decl(self, name, base, sysid, pubid,
notation_name):
# expat 1.2
raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)

def defused_external_entity_ref_handler(self, context, base, sysid,
pubid):
raise ExternalReferenceForbidden(context, base, sysid, pubid)


# aliases
XMLTreeBuilder = XMLParse = DefusedXMLParser

parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser,
_TreeBuilder, _IterParseIterator, _parse, _iterparse)
XML = fromstring
42 changes: 42 additions & 0 deletions lib/python/defusedxml/__init__.py
@@ -0,0 +1,42 @@
# defusedxml
#
# Copyright (c) 2013 by Christian Heimes <christian@python.org>
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
"""Defuse XML bomb denial of service vulnerabilities
"""
from __future__ import print_function, absolute_import

from .common import (DefusedXmlException, DTDForbidden, EntitiesForbidden,
ExternalReferenceForbidden, NotSupportedError,
_apply_defusing)

def defuse_stdlib():
"""Monkey patch and defuse all stdlib packages
:warning: The monkey patch is an EXPERIMETNAL feature.
"""
defused = {}

from . import cElementTree
from . import ElementTree
from . import minidom
from . import pulldom
from . import sax
from . import expatbuilder
from . import expatreader
from . import xmlrpc

xmlrpc.monkey_patch()
defused[xmlrpc] = None

for defused_mod in [cElementTree, ElementTree, minidom, pulldom, sax,
expatbuilder, expatreader]:
stdlib_mod = _apply_defusing(defused_mod)
defused[defused_mod] = stdlib_mod

return defused


__version__ = "0.4.1"

25 changes: 25 additions & 0 deletions lib/python/defusedxml/cElementTree.py
@@ -0,0 +1,25 @@
# defusedxml
#
# Copyright (c) 2013 by Christian Heimes <christian@python.org>
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
"""Defused xml.etree.cElementTree
"""
from __future__ import absolute_import

from xml.etree.cElementTree import TreeBuilder as _TreeBuilder
from xml.etree.cElementTree import parse as _parse
from xml.etree.cElementTree import tostring
# iterparse from ElementTree!
from xml.etree.ElementTree import iterparse as _iterparse

from .ElementTree import DefusedXMLParser, _IterParseIterator
from .common import _generate_etree_functions

__origin__ = "xml.etree.cElementTree"

XMLTreeBuilder = XMLParse = DefusedXMLParser

parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser,
_TreeBuilder, _IterParseIterator, _parse, _iterparse)
XML = fromstring
163 changes: 163 additions & 0 deletions lib/python/defusedxml/common.py
@@ -0,0 +1,163 @@
# defusedxml
#
# Copyright (c) 2013 by Christian Heimes <christian@python.org>
# Licensed to PSF under a Contributor Agreement.
# See http://www.python.org/psf/license for licensing details.
"""Common constants, exceptions and helpe functions
"""
import sys
from types import MethodType

PY3 = sys.version_info[0] == 3
PY26 = sys.version_info[:2] == (2, 6)
PY31 = sys.version_info[:2] == (3, 1)


class DefusedXmlException(ValueError):
"""Base exception
"""
def __repr__(self):
return str(self)


class DTDForbidden(DefusedXmlException):
"""Document type definition is forbidden
"""
def __init__(self, name, sysid, pubid):
super(DTDForbidden, self).__init__()
self.name = name
self.sysid = sysid
self.pubid = pubid

def __str__(self):
tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})"
return tpl.format(self.name, self.sysid, self.pubid)


class EntitiesForbidden(DefusedXmlException):
"""Entity definition is forbidden
"""
def __init__(self, name, value, base, sysid, pubid, notation_name):
super(EntitiesForbidden, self).__init__()
self.name = name
self.value = value
self.base = base
self.sysid = sysid
self.pubid = pubid
self.notation_name = notation_name

def __str__(self):
tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})"
return tpl.format(self.name, self.sysid, self.pubid)


class ExternalReferenceForbidden(DefusedXmlException):
"""Resolving an external reference is forbidden
"""
def __init__(self, context, base, sysid, pubid):
super(ExternalReferenceForbidden, self).__init__()
self.context = context
self.base = base
self.sysid = sysid
self.pubid = pubid

def __str__(self):
tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})"
return tpl.format(self.sysid, self.pubid)


class NotSupportedError(DefusedXmlException):
"""The operation is not supported
"""


def _apply_defusing(defused_mod):
assert defused_mod is sys.modules[defused_mod.__name__]
stdlib_name = defused_mod.__origin__
__import__(stdlib_name, {}, {}, ["*"])
stdlib_mod = sys.modules[stdlib_name]
stdlib_names = set(dir(stdlib_mod))
for name, obj in vars(defused_mod).items():
if name.startswith("_") or name not in stdlib_names:
continue
setattr(stdlib_mod, name, obj)
return stdlib_mod


def _generate_etree_functions(DefusedXMLParser, _TreeBuilder,
_IterParseIterator, _parse, _iterparse):
"""Factory for functions needed by etree, dependent on whether
cElementTree or ElementTree is used."""

def parse(source, parser=None, forbid_dtd=False, forbid_entities=True,
forbid_external=True):
if parser is None:
parser = DefusedXMLParser(target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external)
return _parse(source, parser)

if PY26 or PY31:
def bind(xmlparser, funcname, hookname):
func = getattr(DefusedXMLParser, funcname)
if PY26:
# unbound -> function
func = func.__func__
method = MethodType(func, xmlparser, xmlparser.__class__)
else:
method = MethodType(func, xmlparser)
# set hook
setattr(xmlparser._parser, hookname, method)

def iterparse(source, events=None, forbid_dtd=False,
forbid_entities=True, forbid_external=True):
it = _iterparse(source, events)
xmlparser = it._parser
if forbid_dtd:
bind(xmlparser, "defused_start_doctype_decl",
"StartDoctypeDeclHandler")
if forbid_entities:
bind(xmlparser, "defused_entity_decl",
"EntityDeclHandler")
bind(xmlparser, "defused_unparsed_entity_decl",
"UnparsedEntityDeclHandler")
if forbid_external:
bind(xmlparser, "defused_external_entity_ref_handler",
"ExternalEntityRefHandler")
return it
elif PY3:
def iterparse(source, events=None, parser=None, forbid_dtd=False,
forbid_entities=True, forbid_external=True):
close_source = False
if not hasattr(source, "read"):
source = open(source, "rb")
close_source = True
if not parser:
parser = DefusedXMLParser(target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external)
return _IterParseIterator(source, events, parser, close_source)
else:
# Python 2.7
def iterparse(source, events=None, parser=None, forbid_dtd=False,
forbid_entities=True, forbid_external=True):
if parser is None:
parser = DefusedXMLParser(target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external)
return _iterparse(source, events, parser)

def fromstring(text, forbid_dtd=False, forbid_entities=True,
forbid_external=True):
parser = DefusedXMLParser(target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external)
parser.feed(text)
return parser.close()


return parse, iterparse, fromstring

0 comments on commit 01f49e2

Please sign in to comment.