Permalink
Browse files

work around Element proxy cleanup problems in PyPy

  • Loading branch information...
1 parent 6116832 commit 105c3500a34c56705006b5463fc1f3c4d5561894 @scoder scoder committed Oct 14, 2012
Showing with 50 additions and 22 deletions.
  1. +37 −22 src/lxml/proxy.pxi
  2. +13 −0 src/lxml/tests/test_etree.py
View
@@ -16,35 +16,49 @@ cdef inline _Element getProxy(xmlNode* c_node):
else:
return None
-cdef inline int hasProxy(xmlNode* c_node):
- return c_node._private is not NULL
-
+cdef inline bint hasProxy(xmlNode* c_node):
+ if c_node._private is NULL:
+ return False
+ if python.IS_PYPY:
+ if python.PyWeakref_LockObject(<python.PyObject*>c_node._private) is None:
+ # proxy has already died => remove weak reference
+ obj_ptr = <python.PyObject*>c_node._private
+ c_node._private = NULL
+ python.Py_XDECREF(obj_ptr)
+ return False
+ return True
+
cdef inline int _registerProxy(_Element proxy, _Document doc,
xmlNode* c_node) except -1:
u"""Register a proxy and type for the node it's proxying for.
"""
#print "registering for:", <int>proxy._c_node
- assert c_node._private is NULL, u"double registering proxy!"
+ assert not hasProxy(c_node), u"double registering proxy!"
proxy._doc = doc
proxy._c_node = c_node
if python.IS_PYPY:
c_node._private = <void*>python.PyWeakref_NewRef(proxy, NULL)
+ if c_node._private is NULL:
+ return -1 # manual exception propagation
else:
c_node._private = <void*>proxy
# additional INCREF to make sure _Document is GC-ed LAST!
proxy._gc_doc = <python.PyObject*>doc
python.Py_INCREF(doc)
+ return 0
cdef inline int _unregisterProxy(_Element proxy) except -1:
u"""Unregister a proxy for the node it's proxying for.
"""
cdef xmlNode* c_node
c_node = proxy._c_node
if python.IS_PYPY:
- python.Py_XDECREF(<python.PyObject*>c_node._private)
+ obj_ptr = <python.PyObject*>c_node._private
+ c_node._private = NULL
+ python.Py_XDECREF(obj_ptr)
else:
assert c_node._private is <void*>proxy, u"Tried to unregister unknown proxy"
- c_node._private = NULL
+ c_node._private = NULL
return 0
cdef inline void _releaseProxy(_Element proxy):
@@ -53,15 +67,9 @@ cdef inline void _releaseProxy(_Element proxy):
python.Py_XDECREF(proxy._gc_doc)
proxy._gc_doc = NULL
-cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc):
+cdef inline void _updateProxyDocument(_Element element, _Document doc):
u"""Replace the document reference of a proxy.
"""
- cdef _Document old_doc
- cdef _Element element
- if python.IS_PYPY:
- element = <_Element>python.PyWeakref_LockObject(<python.PyObject*>c_node._private)
- else:
- element = <_Element>c_node._private
if element._doc is not doc:
old_doc = element._doc
element._doc = doc
@@ -172,7 +180,7 @@ cdef xmlNode* getDeallocationTop(xmlNode* c_node):
cdef xmlNode* c_current
cdef xmlNode* c_top
#print "trying to do deallocating:", c_node.type
- if c_node._private is not NULL:
+ if hasProxy(c_node):
#print "Not freeing: proxies still exist"
return NULL
c_current = c_node.parent
@@ -184,7 +192,7 @@ cdef xmlNode* getDeallocationTop(xmlNode* c_node):
#print "not freeing: still in doc"
return NULL
# if we're still attached to the document, don't deallocate
- if c_current._private is not NULL:
+ if hasProxy(c_current):
#print "Not freeing: proxies still exist"
return NULL
c_top = c_current
@@ -199,7 +207,7 @@ cdef int canDeallocateChildNodes(xmlNode* c_parent):
cdef xmlNode* c_node
c_node = c_parent.children
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
- if c_node._private is not NULL:
+ if hasProxy(c_node):
return 0
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return 1
@@ -340,7 +348,7 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
if tree._isElementOrXInclude(c_element):
- if c_element._private is not NULL:
+ if hasProxy(c_element):
proxy_count += 1
# 1) cut out namespaces defined here that are already known by
@@ -391,7 +399,11 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
# (and potentially deallocate the source document)
if proxy_count > 0:
if proxy_count == 1 and c_start_node._private is not NULL:
- _updateProxyDocument(c_start_node, doc)
+ proxy = getProxy(c_start_node)
+ if proxy is not None:
+ _updateProxyDocument(proxy, doc)
+ else:
+ fixElementDocument(c_start_node, doc, proxy_count)
else:
fixElementDocument(c_start_node, doc, proxy_count)
@@ -401,12 +413,15 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
cdef void fixElementDocument(xmlNode* c_element, _Document doc,
size_t proxy_count):
cdef xmlNode* c_node = c_element
+ cdef _Element proxy = None # init-to-None required due to fake-loop below
tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
if c_node._private is not NULL:
- _updateProxyDocument(c_node, doc)
- proxy_count -= 1
- if proxy_count == 0:
- return
+ proxy = getProxy(c_node)
+ if proxy is not None:
+ _updateProxyDocument(proxy, doc)
+ proxy_count -= 1
+ if proxy_count == 0:
+ return
tree.END_FOR_EACH_FROM(c_node)
cdef void fixThreadDictNames(xmlNode* c_element,
@@ -12,6 +12,7 @@
import copy
import sys
import re
+import gc
import operator
import tempfile
import gzip
@@ -3176,6 +3177,18 @@ def test_tostring_unicode_pretty(self):
result = tostring(a, encoding=_unicode, pretty_print=True)
self.assertEquals(result, "<a>\n <b/>\n <c/>\n</a>\n")
+ def test_pypy_proxy_collect(self):
+ root = etree.Element('parent')
+ etree.SubElement(root, 'child')
+
+ self.assertEquals(len(root), 1)
+ self.assertEquals(root[0].tag, 'child')
+
+ # in PyPy, GC used to kill the Python proxy instance without cleanup
+ gc.collect()
+ self.assertEquals(len(root), 1)
+ self.assertEquals(root[0].tag, 'child')
+
# helper methods
def _writeElement(self, element, encoding='us-ascii', compression=0):

0 comments on commit 105c350

Please sign in to comment.