diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi index 6265afe38..7fcc49fca 100644 --- a/src/lxml/proxy.pxi +++ b/src/lxml/proxy.pxi @@ -16,24 +16,36 @@ cdef inline _Element getProxy(xmlNode* c_node): else: return None -cdef inline int hasProxy(xmlNode* c_node): - return c_node._private is not NULL - +cdef inline bint hasProxy(xmlNode* c_node): + if c_node._private is NULL: + return False + if python.IS_PYPY: + if python.PyWeakref_LockObject(c_node._private) is None: + # proxy has already died => remove weak reference + obj_ptr = c_node._private + c_node._private = NULL + python.Py_XDECREF(obj_ptr) + return False + return True + cdef inline int _registerProxy(_Element proxy, _Document doc, xmlNode* c_node) except -1: u"""Register a proxy and type for the node it's proxying for. """ #print "registering for:", proxy._c_node - assert c_node._private is NULL, u"double registering proxy!" + assert not hasProxy(c_node), u"double registering proxy!" proxy._doc = doc proxy._c_node = c_node if python.IS_PYPY: c_node._private = python.PyWeakref_NewRef(proxy, NULL) + if c_node._private is NULL: + return -1 # manual exception propagation else: c_node._private = proxy # additional INCREF to make sure _Document is GC-ed LAST! proxy._gc_doc = doc python.Py_INCREF(doc) + return 0 cdef inline int _unregisterProxy(_Element proxy) except -1: u"""Unregister a proxy for the node it's proxying for. @@ -41,10 +53,12 @@ cdef inline int _unregisterProxy(_Element proxy) except -1: cdef xmlNode* c_node c_node = proxy._c_node if python.IS_PYPY: - python.Py_XDECREF(c_node._private) + obj_ptr = c_node._private + c_node._private = NULL + python.Py_XDECREF(obj_ptr) else: assert c_node._private is proxy, u"Tried to unregister unknown proxy" - c_node._private = NULL + c_node._private = NULL return 0 cdef inline void _releaseProxy(_Element proxy): @@ -53,15 +67,9 @@ cdef inline void _releaseProxy(_Element proxy): python.Py_XDECREF(proxy._gc_doc) proxy._gc_doc = NULL -cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc): +cdef inline void _updateProxyDocument(_Element element, _Document doc): u"""Replace the document reference of a proxy. """ - cdef _Document old_doc - cdef _Element element - if python.IS_PYPY: - element = <_Element>python.PyWeakref_LockObject(c_node._private) - else: - element = <_Element>c_node._private if element._doc is not doc: old_doc = element._doc element._doc = doc @@ -172,7 +180,7 @@ cdef xmlNode* getDeallocationTop(xmlNode* c_node): cdef xmlNode* c_current cdef xmlNode* c_top #print "trying to do deallocating:", c_node.type - if c_node._private is not NULL: + if hasProxy(c_node): #print "Not freeing: proxies still exist" return NULL c_current = c_node.parent @@ -184,7 +192,7 @@ cdef xmlNode* getDeallocationTop(xmlNode* c_node): #print "not freeing: still in doc" return NULL # if we're still attached to the document, don't deallocate - if c_current._private is not NULL: + if hasProxy(c_current): #print "Not freeing: proxies still exist" return NULL c_top = c_current @@ -199,7 +207,7 @@ cdef int canDeallocateChildNodes(xmlNode* c_parent): cdef xmlNode* c_node c_node = c_parent.children tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1) - if c_node._private is not NULL: + if hasProxy(c_node): return 0 tree.END_FOR_EACH_ELEMENT_FROM(c_node) return 1 @@ -340,7 +348,7 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) if tree._isElementOrXInclude(c_element): - if c_element._private is not NULL: + if hasProxy(c_element): proxy_count += 1 # 1) cut out namespaces defined here that are already known by @@ -391,7 +399,11 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, # (and potentially deallocate the source document) if proxy_count > 0: if proxy_count == 1 and c_start_node._private is not NULL: - _updateProxyDocument(c_start_node, doc) + proxy = getProxy(c_start_node) + if proxy is not None: + _updateProxyDocument(proxy, doc) + else: + fixElementDocument(c_start_node, doc, proxy_count) else: fixElementDocument(c_start_node, doc, proxy_count) @@ -401,12 +413,15 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, cdef void fixElementDocument(xmlNode* c_element, _Document doc, size_t proxy_count): cdef xmlNode* c_node = c_element + cdef _Element proxy = None # init-to-None required due to fake-loop below tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) if c_node._private is not NULL: - _updateProxyDocument(c_node, doc) - proxy_count -= 1 - if proxy_count == 0: - return + proxy = getProxy(c_node) + if proxy is not None: + _updateProxyDocument(proxy, doc) + proxy_count -= 1 + if proxy_count == 0: + return tree.END_FOR_EACH_FROM(c_node) cdef void fixThreadDictNames(xmlNode* c_element, diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index 5a09634a6..4d8e1b270 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -12,6 +12,7 @@ import copy import sys import re +import gc import operator import tempfile import gzip @@ -3176,6 +3177,18 @@ def test_tostring_unicode_pretty(self): result = tostring(a, encoding=_unicode, pretty_print=True) self.assertEquals(result, "\n \n \n\n") + def test_pypy_proxy_collect(self): + root = etree.Element('parent') + etree.SubElement(root, 'child') + + self.assertEquals(len(root), 1) + self.assertEquals(root[0].tag, 'child') + + # in PyPy, GC used to kill the Python proxy instance without cleanup + gc.collect() + self.assertEquals(len(root), 1) + self.assertEquals(root[0].tag, 'child') + # helper methods def _writeElement(self, element, encoding='us-ascii', compression=0):