Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
555 lines (493 sloc) 18.5 KB
# read-only tree implementation
@cython.internal
cdef class _ReadOnlyProxy:
u"A read-only proxy class suitable for PIs/Comments (for internal use only!)."
cdef bint _free_after_use
cdef xmlNode* _c_node
cdef _ReadOnlyProxy _source_proxy
cdef list _dependent_proxies
def __cinit__(self):
self._c_node = NULL
self._free_after_use = 0
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
"""
assert self._c_node is not NULL, u"Proxy invalidated!"
return 0
cdef int _raise_unsupported_type(self):
raise TypeError("Unsupported node type: %d" % self._c_node.type)
cdef void free_after_use(self):
u"""Should the xmlNode* be freed when releasing the proxy?
"""
self._free_after_use = 1
property tag:
u"""Element tag
"""
def __get__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return _namespacedName(self._c_node)
elif self._c_node.type == tree.XML_PI_NODE:
return ProcessingInstruction
elif self._c_node.type == tree.XML_COMMENT_NODE:
return Comment
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
return Entity
else:
self._raise_unsupported_type()
property text:
u"""Text before the first subelement. This is either a string or
the value None, if there was no text.
"""
def __get__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return _collectText(self._c_node.children)
elif self._c_node.type in (tree.XML_PI_NODE,
tree.XML_COMMENT_NODE):
if self._c_node.content is NULL:
return ''
else:
return funicode(self._c_node.content)
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
return u'&%s;' % funicode(self._c_node.name)
else:
self._raise_unsupported_type()
property tail:
u"""Text after this element's end tag, but before the next sibling
element's start tag. This is either a string or the value None, if
there was no text.
"""
def __get__(self):
self._assertNode()
return _collectText(self._c_node.next)
property sourceline:
u"""Original line number as found by the parser or None if unknown.
"""
def __get__(self):
cdef long line
self._assertNode()
line = tree.xmlGetLineNo(self._c_node)
if line > 0:
return line
else:
return None
def __repr__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return u"<Element %s at 0x%x>" % (self.tag, id(self))
elif self._c_node.type == tree.XML_COMMENT_NODE:
return u"<!--%s-->" % self.text
elif self._c_node.type == tree.XML_ENTITY_NODE:
return u"&%s;" % funicode(self._c_node.name)
elif self._c_node.type == tree.XML_PI_NODE:
text = self.text
if text:
return u"<?%s %s?>" % (self.target, text)
else:
return u"<?%s?>" % self.target
else:
self._raise_unsupported_type()
def __getitem__(self, x):
u"""Returns the subelement at the given position or the requested
slice.
"""
cdef xmlNode* c_node = NULL
cdef Py_ssize_t step = 0, slicelength = 0
cdef Py_ssize_t c, i
cdef _node_to_node_function next_element
cdef list result
self._assertNode()
if python.PySlice_Check(x):
# slicing
if _isFullSlice(<slice>x):
return _collectChildren(self)
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
if c_node is NULL:
return []
if step > 0:
next_element = _nextElement
else:
step = -step
next_element = _previousElement
result = []
c = 0
while c_node is not NULL and c < slicelength:
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
result.append(_elementFactory(self._doc, c_node))
c = c + 1
for i from 0 <= i < step:
c_node = next_element(c_node)
return result
else:
# indexing
c_node = _findChild(self._c_node, x)
if c_node is NULL:
raise IndexError, u"list index out of range"
return _newReadOnlyProxy(self._source_proxy, c_node)
def __len__(self):
u"""Returns the number of subelements.
"""
cdef Py_ssize_t c
cdef xmlNode* c_node
self._assertNode()
c = 0
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
c = c + 1
c_node = c_node.next
return c
def __nonzero__(self):
cdef xmlNode* c_node
self._assertNode()
c_node = _findChildBackwards(self._c_node, 0)
return c_node != NULL
def __deepcopy__(self, memo):
u"__deepcopy__(self, memo)"
return self.__copy__()
cpdef __copy__(self):
u"__copy__(self)"
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
if self._c_node is NULL:
return self
c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
new_doc = _documentFactory(c_doc, None)
root = new_doc.getroot()
if root is not None:
return root
# Comment/PI
c_node = c_doc.children
while c_node is not NULL and c_node.type != self._c_node.type:
c_node = c_node.next
if c_node is NULL:
return None
return _elementFactory(new_doc, c_node)
def __iter__(self):
return iter(self.getchildren())
def iterchildren(self, tag=None, *, reversed=False):
u"""iterchildren(self, tag=None, reversed=False)
Iterate over the children of this element.
"""
children = self.getchildren()
if tag is not None and tag != '*':
children = [ el for el in children if el.tag == tag ]
if reversed:
children = children[::-1]
return iter(children)
cpdef getchildren(self):
u"""Returns all subelements. The elements are returned in document
order.
"""
cdef xmlNode* c_node
cdef list result
self._assertNode()
result = []
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
c_node = c_node.next
return result
def getparent(self):
u"""Returns the parent of this element or None for the root element.
"""
cdef xmlNode* c_parent
self._assertNode()
c_parent = self._c_node.parent
if c_parent is NULL or not tree._isElement(c_parent):
return None
else:
return _newReadOnlyProxy(self._source_proxy, c_parent)
def getnext(self):
u"""Returns the following sibling of this element or None.
"""
cdef xmlNode* c_node
self._assertNode()
c_node = _nextElement(self._c_node)
if c_node is not NULL:
return _newReadOnlyProxy(self._source_proxy, c_node)
return None
def getprevious(self):
u"""Returns the preceding sibling of this element or None.
"""
cdef xmlNode* c_node
self._assertNode()
c_node = _previousElement(self._c_node)
if c_node is not NULL:
return _newReadOnlyProxy(self._source_proxy, c_node)
return None
@cython.final
@cython.internal
cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
u"A read-only proxy for processing instructions (for internal use only!)"
property target:
def __get__(self):
self._assertNode()
return funicode(self._c_node.name)
@cython.final
@cython.internal
cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
u"A read-only proxy for entity references (for internal use only!)"
property name:
def __get__(self):
return funicode(self._c_node.name)
def __set__(self, value):
value_utf = _utf8(value)
assert u'&' not in value and u';' not in value, \
u"Invalid entity name '%s'" % value
tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
property text:
def __get__(self):
return u'&%s;' % funicode(self._c_node.name)
@cython.internal
cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
u"The main read-only Element proxy class (for internal use only!)."
property attrib:
def __get__(self):
self._assertNode()
return dict(_collectAttributes(self._c_node, 3))
property prefix:
u"""Namespace prefix or None.
"""
def __get__(self):
self._assertNode()
if self._c_node.ns is not NULL:
if self._c_node.ns.prefix is not NULL:
return funicode(self._c_node.ns.prefix)
return None
def get(self, key, default=None):
u"""Gets an element attribute.
"""
self._assertNode()
return _getNodeAttributeValue(self._c_node, key, default)
def keys(self):
u"""Gets a list of attribute names. The names are returned in an
arbitrary order (just like for an ordinary Python dictionary).
"""
self._assertNode()
return _collectAttributes(self._c_node, 1)
def values(self):
u"""Gets element attributes, as a sequence. The attributes are returned
in an arbitrary order.
"""
self._assertNode()
return _collectAttributes(self._c_node, 2)
def items(self):
u"""Gets element attributes, as a sequence. The attributes are returned
in an arbitrary order.
"""
self._assertNode()
return _collectAttributes(self._c_node, 3)
cdef _ReadOnlyProxy _newReadOnlyProxy(
_ReadOnlyProxy source_proxy, xmlNode* c_node):
cdef _ReadOnlyProxy el
if c_node.type == tree.XML_ELEMENT_NODE:
el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
elif c_node.type == tree.XML_PI_NODE:
el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
elif c_node.type in (tree.XML_COMMENT_NODE,
tree.XML_ENTITY_REF_NODE):
el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
else:
raise TypeError("Unsupported element type: %d" % c_node.type)
el._c_node = c_node
_initReadOnlyProxy(el, source_proxy)
return el
cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
_ReadOnlyProxy source_proxy):
if source_proxy is None:
el._source_proxy = el
el._dependent_proxies = [el]
else:
el._source_proxy = source_proxy
source_proxy._dependent_proxies.append(el)
cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
cdef xmlNode* c_node
cdef _ReadOnlyProxy el
if sourceProxy is None:
return
if sourceProxy._dependent_proxies is None:
return
for el in sourceProxy._dependent_proxies:
c_node = el._c_node
el._c_node = NULL
if el._free_after_use:
tree.xmlFreeNode(c_node)
del sourceProxy._dependent_proxies[:]
# opaque wrapper around non-element nodes, e.g. the document node
#
# This class does not imply any restrictions on modifiability or
# read-only status of the node, so use with caution.
@cython.internal
cdef class _OpaqueNodeWrapper:
cdef tree.xmlNode* _c_node
def __init__(self):
raise TypeError, u"This type cannot be instatiated from Python"
@cython.final
@cython.internal
cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
"""
assert self._c_node is not NULL, u"Proxy invalidated!"
return 0
cpdef append(self, other_element):
u"""Append a copy of an Element to the list of children.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
self._assertNode()
c_node = _roNodeOf(other_element)
if c_node.type == tree.XML_ELEMENT_NODE:
if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
raise ValueError, u"cannot append, document already has a root element"
elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
raise TypeError, u"unsupported element type for top-level node: %d" % c_node.type
c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
c_next = c_node.next
tree.xmlAddChild(self._c_node, c_node)
_moveTail(c_next, c_node)
def extend(self, elements):
u"""Append a copy of all Elements from a sequence to the list of
children.
"""
self._assertNode()
for element in elements:
self.append(element)
cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
cdef _OpaqueNodeWrapper node
if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
else:
node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
node._c_node = c_node
return node
# element proxies that allow restricted modification
@cython.internal
cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
u"""A read-only proxy that allows changing the text content.
"""
property text:
def __get__(self):
self._assertNode()
if self._c_node.content is NULL:
return ''
else:
return funicode(self._c_node.content)
def __set__(self, value):
cdef tree.xmlDict* c_dict
self._assertNode()
if value is None:
c_text = <const_xmlChar*>NULL
else:
value = _utf8(value)
c_text = _xcstr(value)
tree.xmlNodeSetContent(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
u"""A read-only proxy that allows changing the text/target content of a
processing instruction.
"""
property target:
def __get__(self):
self._assertNode()
return funicode(self._c_node.name)
def __set__(self, value):
self._assertNode()
value = _utf8(value)
c_text = _xcstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
u"A read-only proxy for entity references (for internal use only!)"
property name:
def __get__(self):
return funicode(self._c_node.name)
def __set__(self, value):
value = _utf8(value)
assert u'&' not in value and u';' not in value, \
u"Invalid entity name '%s'" % value
c_text = _xcstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
u"""A read-only element that allows adding children and changing the
text content (i.e. everything that adds to the subtree).
"""
cpdef append(self, other_element):
u"""Append a copy of an Element to the list of children.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
self._assertNode()
c_node = _roNodeOf(other_element)
c_node = _copyNodeToDoc(c_node, self._c_node.doc)
c_next = c_node.next
tree.xmlAddChild(self._c_node, c_node)
_moveTail(c_next, c_node)
def extend(self, elements):
u"""Append a copy of all Elements from a sequence to the list of
children.
"""
self._assertNode()
for element in elements:
self.append(element)
property text:
u"""Text before the first subelement. This is either a string or the
value None, if there was no text.
"""
def __get__(self):
self._assertNode()
return _collectText(self._c_node.children)
def __set__(self, value):
self._assertNode()
if isinstance(value, QName):
value = python.PyUnicode_FromEncodedObject(
_resolveQNameText(self, value), 'UTF-8', 'strict')
_setNodeText(self._c_node, value)
cdef _ReadOnlyProxy _newAppendOnlyProxy(
_ReadOnlyProxy source_proxy, xmlNode* c_node):
cdef _ReadOnlyProxy el
if c_node.type == tree.XML_ELEMENT_NODE:
el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
elif c_node.type == tree.XML_PI_NODE:
el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
elif c_node.type == tree.XML_COMMENT_NODE:
el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
else:
raise TypeError("Unsupported element type: %d" % c_node.type)
el._c_node = c_node
_initReadOnlyProxy(el, source_proxy)
return el
cdef xmlNode* _roNodeOf(element) except NULL:
cdef xmlNode* c_node
if isinstance(element, _Element):
c_node = (<_Element>element)._c_node
elif isinstance(element, _ReadOnlyProxy):
c_node = (<_ReadOnlyProxy>element)._c_node
elif isinstance(element, _OpaqueNodeWrapper):
c_node = (<_OpaqueNodeWrapper>element)._c_node
else:
raise TypeError, u"invalid argument type %s" % type(element)
if c_node is NULL:
raise TypeError, u"invalid element"
return c_node
cdef xmlNode* _nonRoNodeOf(element) except NULL:
cdef xmlNode* c_node
if isinstance(element, _Element):
c_node = (<_Element>element)._c_node
elif isinstance(element, _AppendOnlyElementProxy):
c_node = (<_AppendOnlyElementProxy>element)._c_node
elif isinstance(element, _OpaqueNodeWrapper):
c_node = (<_OpaqueNodeWrapper>element)._c_node
else:
raise TypeError, u"invalid argument type %s" % type(element)
if c_node is NULL:
raise TypeError, u"invalid element"
return c_node
Jump to Line
Something went wrong with that request. Please try again.