Skip to content

Commit

Permalink
work around Element proxy cleanup problems in PyPy
Browse files Browse the repository at this point in the history
  • Loading branch information
scoder committed Oct 14, 2012
1 parent 6116832 commit 105c350
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 22 deletions.
59 changes: 37 additions & 22 deletions src/lxml/proxy.pxi
Expand Up @@ -16,35 +16,49 @@ cdef inline _Element getProxy(xmlNode* c_node):
else:
return None

cdef inline int hasProxy(xmlNode* c_node):
return c_node._private is not NULL

cdef inline bint hasProxy(xmlNode* c_node):
if c_node._private is NULL:
return False
if python.IS_PYPY:
if python.PyWeakref_LockObject(<python.PyObject*>c_node._private) is None:
# proxy has already died => remove weak reference
obj_ptr = <python.PyObject*>c_node._private
c_node._private = NULL
python.Py_XDECREF(obj_ptr)
return False
return True

cdef inline int _registerProxy(_Element proxy, _Document doc,
xmlNode* c_node) except -1:
u"""Register a proxy and type for the node it's proxying for.
"""
#print "registering for:", <int>proxy._c_node
assert c_node._private is NULL, u"double registering proxy!"
assert not hasProxy(c_node), u"double registering proxy!"
proxy._doc = doc
proxy._c_node = c_node
if python.IS_PYPY:
c_node._private = <void*>python.PyWeakref_NewRef(proxy, NULL)
if c_node._private is NULL:
return -1 # manual exception propagation
else:
c_node._private = <void*>proxy
# additional INCREF to make sure _Document is GC-ed LAST!
proxy._gc_doc = <python.PyObject*>doc
python.Py_INCREF(doc)
return 0

cdef inline int _unregisterProxy(_Element proxy) except -1:
u"""Unregister a proxy for the node it's proxying for.
"""
cdef xmlNode* c_node
c_node = proxy._c_node
if python.IS_PYPY:
python.Py_XDECREF(<python.PyObject*>c_node._private)
obj_ptr = <python.PyObject*>c_node._private
c_node._private = NULL
python.Py_XDECREF(obj_ptr)
else:
assert c_node._private is <void*>proxy, u"Tried to unregister unknown proxy"
c_node._private = NULL
c_node._private = NULL
return 0

cdef inline void _releaseProxy(_Element proxy):
Expand All @@ -53,15 +67,9 @@ cdef inline void _releaseProxy(_Element proxy):
python.Py_XDECREF(proxy._gc_doc)
proxy._gc_doc = NULL

cdef inline void _updateProxyDocument(xmlNode* c_node, _Document doc):
cdef inline void _updateProxyDocument(_Element element, _Document doc):
u"""Replace the document reference of a proxy.
"""
cdef _Document old_doc
cdef _Element element
if python.IS_PYPY:
element = <_Element>python.PyWeakref_LockObject(<python.PyObject*>c_node._private)
else:
element = <_Element>c_node._private
if element._doc is not doc:
old_doc = element._doc
element._doc = doc
Expand Down Expand Up @@ -172,7 +180,7 @@ cdef xmlNode* getDeallocationTop(xmlNode* c_node):
cdef xmlNode* c_current
cdef xmlNode* c_top
#print "trying to do deallocating:", c_node.type
if c_node._private is not NULL:
if hasProxy(c_node):
#print "Not freeing: proxies still exist"
return NULL
c_current = c_node.parent
Expand All @@ -184,7 +192,7 @@ cdef xmlNode* getDeallocationTop(xmlNode* c_node):
#print "not freeing: still in doc"
return NULL
# if we're still attached to the document, don't deallocate
if c_current._private is not NULL:
if hasProxy(c_current):
#print "Not freeing: proxies still exist"
return NULL
c_top = c_current
Expand All @@ -199,7 +207,7 @@ cdef int canDeallocateChildNodes(xmlNode* c_parent):
cdef xmlNode* c_node
c_node = c_parent.children
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
if c_node._private is not NULL:
if hasProxy(c_node):
return 0
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return 1
Expand Down Expand Up @@ -340,7 +348,7 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,

tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
if tree._isElementOrXInclude(c_element):
if c_element._private is not NULL:
if hasProxy(c_element):
proxy_count += 1

# 1) cut out namespaces defined here that are already known by
Expand Down Expand Up @@ -391,7 +399,11 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
# (and potentially deallocate the source document)
if proxy_count > 0:
if proxy_count == 1 and c_start_node._private is not NULL:
_updateProxyDocument(c_start_node, doc)
proxy = getProxy(c_start_node)
if proxy is not None:
_updateProxyDocument(proxy, doc)
else:
fixElementDocument(c_start_node, doc, proxy_count)
else:
fixElementDocument(c_start_node, doc, proxy_count)

Expand All @@ -401,12 +413,15 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
cdef void fixElementDocument(xmlNode* c_element, _Document doc,
size_t proxy_count):
cdef xmlNode* c_node = c_element
cdef _Element proxy = None # init-to-None required due to fake-loop below
tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
if c_node._private is not NULL:
_updateProxyDocument(c_node, doc)
proxy_count -= 1
if proxy_count == 0:
return
proxy = getProxy(c_node)
if proxy is not None:
_updateProxyDocument(proxy, doc)
proxy_count -= 1
if proxy_count == 0:
return
tree.END_FOR_EACH_FROM(c_node)

cdef void fixThreadDictNames(xmlNode* c_element,
Expand Down
13 changes: 13 additions & 0 deletions src/lxml/tests/test_etree.py
Expand Up @@ -12,6 +12,7 @@
import copy
import sys
import re
import gc
import operator
import tempfile
import gzip
Expand Down Expand Up @@ -3176,6 +3177,18 @@ def test_tostring_unicode_pretty(self):
result = tostring(a, encoding=_unicode, pretty_print=True)
self.assertEquals(result, "<a>\n <b/>\n <c/>\n</a>\n")

def test_pypy_proxy_collect(self):
root = etree.Element('parent')
etree.SubElement(root, 'child')

self.assertEquals(len(root), 1)
self.assertEquals(root[0].tag, 'child')

# in PyPy, GC used to kill the Python proxy instance without cleanup
gc.collect()
self.assertEquals(len(root), 1)
self.assertEquals(root[0].tag, 'child')

# helper methods

def _writeElement(self, element, encoding='us-ascii', compression=0):
Expand Down

0 comments on commit 105c350

Please sign in to comment.