diff --git a/index.js b/index.js index b9b4768d..86fe1f93 100644 --- a/index.js +++ b/index.js @@ -38,3 +38,4 @@ module.exports.SaxPushParser = sax_parser.SaxPushParser; module.exports.memoryUsage = bindings.xmlMemUsed; +module.exports.nodeCount = bindings.xmlNodeCount; diff --git a/src/libxmljs.cc b/src/libxmljs.cc index 9a311fac..69127a58 100644 --- a/src/libxmljs.cc +++ b/src/libxmljs.cc @@ -19,6 +19,9 @@ LibXMLJS LibXMLJS::instance; // track how much memory libxml2 is using int xml_memory_used = 0; +// track how many nodes haven't been freed +int nodeCount = 0; + // wrapper for xmlMemMalloc to update v8's knowledge of memory used // the GC relies on this information void* xmlMemMallocWrap(size_t size) @@ -142,24 +145,40 @@ void deregisterNodeNamespaces(xmlNode* xml_obj) * wrapped, it is necessary to update any wrappers (`XmlNamespace`) which have * been created for attached namespaces. */ + void xmlDeregisterNodeCallback(xmlNode* xml_obj) { - deregisterNodeNamespaces(xml_obj); - if (xml_obj->_private) + if (xml_obj->_private && xml_obj->type != XML_DOCUMENT_NODE && xml_obj->type != XML_HTML_DOCUMENT_NODE) { XmlNode* node = static_cast(xml_obj->_private); - // flag the XmlNode object as freed - node->freed = true; + // keep a copy of the doc so we can free it in the `XmlNode` destructor + node->doc = xml_obj->doc->_private; - // save a reference to the doc so we can still `unref` it - node->doc = xml_obj->doc; + // set xml_obj to NULL so the `XmlNode` destructor knows not to use it + node->xml_obj = NULL; } - return; + + // decrease the node count + if (nodeCount > 0) + nodeCount--; + // NOTE: nodeCount can go below 0 in some cases involving HTML documents + // creating certain document nodes that don't call xmlRegisterNode + + deregisterNodeNamespaces(xml_obj); +} + +// this is called for any created nodes +void xmlRegisterNodeCallback(xmlNode* xml_obj) +{ + nodeCount++; } LibXMLJS::LibXMLJS() { + // set the callback for when a node is created + xmlRegisterNodeDefault(xmlRegisterNodeCallback); + // set the callback for when a node is about to be freed xmlDeregisterNodeDefault(xmlDeregisterNodeCallback); @@ -228,6 +247,12 @@ NAN_METHOD(XmlMemUsed) return info.GetReturnValue().Set(Nan::New(xmlMemUsed())); } +NAN_METHOD(XmlNodeCount) +{ + Nan::HandleScope scope; + return info.GetReturnValue().Set(Nan::New(nodeCount)); +} + NAN_MODULE_INIT(init) { Nan::HandleScope scope; @@ -249,6 +274,8 @@ NAN_MODULE_INIT(init) Nan::Set(target, Nan::New("libxml").ToLocalChecked(), target); Nan::SetMethod(target, "xmlMemUsed", XmlMemUsed); + + Nan::SetMethod(target, "xmlNodeCount", XmlNodeCount); } NODE_MODULE(xmljs, init) diff --git a/src/xml_document.h b/src/xml_document.h index aa0aac55..610a8f6c 100644 --- a/src/xml_document.h +++ b/src/xml_document.h @@ -26,14 +26,21 @@ class XmlDocument : public Nan::ObjectWrap { // given xmlDoc object, intended for use in c++ space static v8::Local New(xmlDoc* doc); + // expose ObjectWrap::Ref void ref() { Ref(); } + // expose ObjectWrap::Unref void unref() { Unref(); } + // expose ObjectWrap::refs_ (for testing) + int refs() { + return refs_; + } + protected: // initialize a new document diff --git a/src/xml_element.cc b/src/xml_element.cc index db202d85..7136417b 100644 --- a/src/xml_element.cc +++ b/src/xml_element.cc @@ -333,11 +333,6 @@ XmlElement::get_attrs() { return scope.Escape(attributes); } -void -XmlElement::add_child(xmlNode* child) { - xmlAddChild(xml_obj, child); -} - void XmlElement::add_cdata(xmlNode* cdata) { xmlAddChild(xml_obj, cdata); @@ -473,16 +468,6 @@ XmlElement::XmlElement(xmlNode* node) { } -void -XmlElement::add_prev_sibling(xmlNode* element) { - xmlAddPrevSibling(xml_obj, element); -} - -void -XmlElement::add_next_sibling(xmlNode* element) { - xmlAddNextSibling(xml_obj, element); -} - void XmlElement::replace_element(xmlNode* element) { xmlReplaceNode(xml_obj, element); @@ -494,12 +479,6 @@ XmlElement::replace_text(const char* content) { xmlReplaceNode(xml_obj, txt); } -xmlNode* -XmlElement::import_element(XmlElement *element) { - return (xml_obj->doc == element->xml_obj->doc) ? - element->xml_obj : xmlDocCopyNode(element->xml_obj, xml_obj->doc, 1); -} - void XmlElement::Initialize(v8::Handle target) { diff --git a/src/xml_element.h b/src/xml_element.h index cfda87bc..a941a15b 100644 --- a/src/xml_element.h +++ b/src/xml_element.h @@ -47,17 +47,13 @@ class XmlElement : public XmlNode { v8::Local get_attr(const char* name); v8::Local get_attrs(); void set_attr(const char* name, const char* value); - void add_child(xmlNode* child); void add_cdata(xmlNode* cdata); void set_content(const char* content); v8::Local get_content(); v8::Local get_next_element(); v8::Local get_prev_element(); - void add_prev_sibling(xmlNode* element); - void add_next_sibling(xmlNode* element); void replace_element(xmlNode* element); void replace_text(const char* content); - xmlNode *import_element(XmlElement* element); }; } // namespace libxmljs diff --git a/src/xml_node.cc b/src/xml_node.cc index 82f25afa..ad609381 100644 --- a/src/xml_node.cc +++ b/src/xml_node.cc @@ -230,68 +230,137 @@ XmlNode::New(xmlNode* node) } XmlNode::XmlNode(xmlNode* node) : xml_obj(node) { - this->freed = false; xml_obj->_private = this; - // this will prevent the document from being cleaned up - // we keep the document if any of the nodes attached to it are still alive + // increase the ref count of each parent + this->ref(); + XmlDocument* doc = static_cast(xml_obj->doc->_private); doc->ref(); - - // check that there's a parent node with a _private pointer - // also check that the parent node isn't the doc since we already Ref() the doc once - if (xml_obj->parent != NULL && - xml_obj->parent->_private != NULL && - (void*)xml_obj->doc != (void*)xml_obj->parent) - { - static_cast(xml_obj->parent->_private)->Ref(); - } } XmlNode::~XmlNode() { - // check if `xml_obj` has been freed so we don't access bad memory - if (this->freed) - { - - // unref the doc using the doc reference we saved in the `flagNode` callback - if (this->doc) - { - XmlDocument* doc = static_cast(this->doc->_private); - doc->unref(); - } + // if we already freed xml_obj, + // then unref() the doc with this->doc - // set doc to null for good measure? - // this->doc = NULL; - - // return so we don't attempt to use `xml_obj` + if (xml_obj == NULL) { + XmlDocument* doc = static_cast(this->doc); + doc->unref(); return; } - xml_obj->_private = NULL; - // release the hold and allow the document to be freed + // this->unref() may cause `xml_obj` to be freed + // so we create a reference to the document now XmlDocument* doc = static_cast(xml_obj->doc->_private); + + // decrease the ref count of each parent + this->unref(); + + // doc->unref() may cause `xml_obj` to be freed + // so we unref the document _after_ we unref each parent doc->unref(); - // We do not free the xmlNode here if it is linked to a document - // It will be freed when the doc is freed - if (xml_obj->parent == NULL) - xmlFreeNode(xml_obj); - - // if there's a parent then Unref() it - else if (xml_obj->parent->_private != NULL && - (void*)xml_obj->doc != (void*)xml_obj->parent) - { - XmlNode* parent = static_cast(xml_obj->parent->_private); - - // make sure Unref() is necessary - if (parent->refs_ > 0) - { - parent->Unref(); - } + // unref() may have already set _private to NULL + // so we check that `xml_obj` is still available + if (xml_obj != NULL) + xml_obj->_private = NULL; +} + +// recursively ref each parent +void XmlNode::ref(int count) { + // make sure count > 0 + if (count < 1) + count = 1; + + // loop through each parent and increase the ref count + xmlNode* node = xml_obj; + while (node->parent != NULL) { + node = node->parent; + if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) + break; + node->refs += count; + } +} + +// recursively unref each parent +void XmlNode::unref(int count) { + // make sure count > 0 + if (count < 1) + count = 1; + + // loop through each parent and decrease the ref count + xmlNode* node = xml_obj; + while (node->parent != NULL) { + node = node->parent; + if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) + break; + node->refs -= count; + } + /* + * At this point `node` is now the top-most parent. + * + * If `node` is NOT a document, then we know that it is + * detached from the document since it has no parent. + * + */ + + /* + * Here we free the top-most parent node IF: + * 1. it's not a document + * 2. it has no refs + * + */ + if (node->type != XML_DOCUMENT_NODE && node->type != XML_HTML_DOCUMENT_NODE && node->refs == 0) { + xmlFreeNode(node); } } +void +XmlNode::add_child(xmlNode* child) { + xmlAddChild(xml_obj, child); + // if the child is wrapped then we have to ref its parents + if (child->_private != NULL) { + XmlNode* node = static_cast(child->_private); + node->ref(child->refs); + } +} + +void +XmlNode::add_prev_sibling(xmlNode* node) { + xmlAddPrevSibling(xml_obj, node); + if (node->_private != NULL) + ref(node->refs); +} + +void +XmlNode::add_next_sibling(xmlNode* node) { + xmlAddNextSibling(xml_obj, node); + if (node->_private != NULL) + ref(node->refs); +} + +xmlNode* +XmlNode::import_element(XmlNode *element) { + /* + * We must clone the node if it has a parent (even if it is in the same document) + * + * Unless the node is a free-standing orphan without a document + * then we must clone it or there will be refcount issues. + * + */ + xmlNode* node = element->xml_obj; + if (xml_obj->doc == node->doc && + node->parent == NULL && + node->type != XML_DOCUMENT_NODE && + node->type != XML_HTML_DOCUMENT_NODE) { + return node; + }else{ + node = xmlDocCopyNode(node, xml_obj->doc, 1); + return node; + } +} + v8::Local XmlNode::get_doc() { Nan::EscapableHandleScope scope; @@ -430,7 +499,16 @@ XmlNode::to_string(int options) { void XmlNode::remove() { - xmlUnlinkNode(xml_obj); + /* + * Here we unref each parent before removing the node + * + * We must unref each parent with the number of refs + * the current node has, because the current node may + * have children that need unref'ed as well. + * + */ + this->unref(xml_obj->refs); + xmlUnlinkNode(xml_obj); } v8::Local diff --git a/src/xml_node.h b/src/xml_node.h index def21ed1..f011bcaa 100644 --- a/src/xml_node.h +++ b/src/xml_node.h @@ -12,11 +12,15 @@ class XmlNode : public Nan::ObjectWrap { xmlNode* xml_obj; - // boolean value to check if `xml_obj` was already freed - bool freed; - // backup reference to the doc in case `xml_obj` was already freed - xmlDoc* doc; + void* doc; + + // recursive referencing functions + void ref(int count = 1); + void unref(int count = 1); + int refs() { + return refs_; + }; explicit XmlNode(xmlNode* node); virtual ~XmlNode(); @@ -55,6 +59,10 @@ class XmlNode : public Nan::ObjectWrap { v8::Local get_type(); v8::Local to_string(int options = 0); void remove(); + xmlNode *import_element(XmlNode* element); + void add_child(xmlNode* child); + void add_prev_sibling(xmlNode* element); + void add_next_sibling(xmlNode* element); }; } // namespace libxmljs diff --git a/src/xml_text.cc b/src/xml_text.cc index b50c25bb..c45aa45a 100644 --- a/src/xml_text.cc +++ b/src/xml_text.cc @@ -179,16 +179,6 @@ XmlText::XmlText(xmlNode* node) { } -void -XmlText::add_prev_sibling(xmlNode* element) { - xmlAddPrevSibling(xml_obj, element); -} - -void -XmlText::add_next_sibling(xmlNode* element) { - xmlAddNextSibling(xml_obj, element); -} - void XmlText::replace_element(xmlNode* element) { xmlReplaceNode(xml_obj, element); @@ -200,12 +190,6 @@ XmlText::replace_text(const char* content) { xmlReplaceNode(xml_obj, txt); } -xmlNode* -XmlText::import_element(XmlText *element) { - return (xml_obj->doc == element->xml_obj->doc) ? - element->xml_obj : xmlDocCopyNode(element->xml_obj, xml_obj->doc, 1); -} - void XmlText::Initialize(v8::Handle target) { diff --git a/src/xml_text.h b/src/xml_text.h index 7089485b..30d43dec 100644 --- a/src/xml_text.h +++ b/src/xml_text.h @@ -36,9 +36,6 @@ class XmlText : public XmlNode { void set_content(const char* content); void replace_text(const char* content); void replace_element(xmlNode* element); - xmlNode *import_element(XmlText* element); - void add_prev_sibling(xmlNode* element); - void add_next_sibling(xmlNode* element); }; } // namespace libxmljs diff --git a/test/comment.js b/test/comment.js index 6225cc85..50a2a332 100644 --- a/test/comment.js +++ b/test/comment.js @@ -15,4 +15,3 @@ module.exports.text = function(assert) { assert.equal('comment2', comm.text()); assert.done(); }; - diff --git a/test/document.js b/test/document.js index b8bdfd8a..5ab3fc4c 100644 --- a/test/document.js +++ b/test/document.js @@ -253,7 +253,7 @@ module.exports.rngValidate = function(assert) { ''+ ''; - var xml_valid = + var xml_valid = ''+ ''+ 'John Smith'+ @@ -265,7 +265,7 @@ module.exports.rngValidate = function(assert) { ''+ ''; - var xml_invalid = + var xml_invalid = ''+ ''+ 'John Smith'+ @@ -280,7 +280,7 @@ module.exports.rngValidate = function(assert) { var rngDoc = libxml.parseXml(rng); var xmlDocValid = libxml.parseXml(xml_valid); var xmlDocInvalid = libxml.parseXml(xml_invalid); - + assert.equal(xmlDocValid.rngValidate(rngDoc), true); assert.equal(xmlDocValid.validationErrors.length, 0); @@ -404,4 +404,3 @@ module.exports.validate_rng_memory_usage = function(assert) { assert.ok(process.memoryUsage().rss - initialMemory.rss < maxRssDelta); assert.done(); }; - diff --git a/test/element.js b/test/element.js index 0a436569..a69af0c9 100644 --- a/test/element.js +++ b/test/element.js @@ -103,6 +103,33 @@ module.exports.addChild = function(assert) { assert.done(); }; +module.exports.clones_added_nodes = function(assert) { + var doc = libxml.parseXml(''); + var child1 = doc.get('child1'); + var child2 = doc.get('child2'); + var inner = libxml.Element(doc, 'inner', 'original'); + child1.addChild(inner); + child2.addChild(inner); // addChild should clone inner + inner.text('modified'); + assert.ok(doc.get('//child1').text() == 'modified'); + assert.ok(doc.get('//child2').text() == 'original'); + + var next = libxml.Element(doc, 'nextSibling', 'original'); + child1.addNextSibling(next); + child2.addNextSibling(next); // addNextSibling should clone next + next.text('modified'); + assert.ok(child1.nextSibling().text() == 'modified'); + assert.ok(child2.nextSibling().text() == 'original'); + + var prev = libxml.Element(doc, 'prevSibling', 'original'); + child1.addPrevSibling(prev); + child2.addPrevSibling(prev); // addNextSibling should clone next + prev.text('modified'); + assert.ok(child1.prevSibling().text() == 'modified'); + assert.ok(child2.prevSibling().text() == 'original'); + assert.done(); +}; + module.exports.add_prev_sibling = function(assert) { var doc = libxml.Document(); var elem = doc.node('name1'); diff --git a/test/memory_management.js b/test/memory_management.js index 0902b881..f26d4121 100644 --- a/test/memory_management.js +++ b/test/memory_management.js @@ -9,6 +9,49 @@ module.exports.setUp = function(done) { done(); }; +module.exports.inaccessible_document_freed = function(assert) { + var xml_memory_before_document = libxml.memoryUsage(); + for (var i=0; i<10; i++) { + makeDocument(); + } + collectGarbage(); + assert.ok(libxml.memoryUsage() <= xml_memory_before_document); + assert.done(); +}; + +module.exports.inaccessible_document_freed_when_node_freed = function(assert) { + var xml_memory_before_document = libxml.memoryUsage(); + var nodes = []; + for (var i=0; i<10; i++) { + nodes.push(makeDocument().get('//center')); + } + nodes = null; + collectGarbage(); + assert.ok(libxml.memoryUsage() <= xml_memory_before_document); + assert.done(); +}; + +module.exports.inaccessible_document_freed_after_middle_nodes_proxied = function(assert) { + var xml_memory_before_document = libxml.memoryUsage(); + var doc = makeDocument(); + var middle = doc.get('//middle'); + var inner = doc.get('//inner'); + inner.remove(); // v0.14.3, v0.15: proxy ref'd parent but can't unref when destroyed + doc = middle = inner = null; + collectGarbage(); + assert.ok(libxml.memoryUsage() <= xml_memory_before_document); + assert.done(); +}; + +module.exports.inaccessible_tree_freed = function(assert) { + var doc = makeDocument(); + var xml_memory_after_document = libxml.memoryUsage(); + doc.get('//middle').remove();; + collectGarbage(); + assert.ok(libxml.memoryUsage() < xml_memory_after_document); + assert.done(); +}; + module.exports.namespace_list_freed = function(assert) { var doc = makeDocument(); var el = doc.get('//center'); @@ -48,4 +91,3 @@ function collectGarbage(minCycles, maxCycles) { return usage; } - diff --git a/test/ref_integrity.js b/test/ref_integrity.js index b3cff28c..bfbba185 100644 --- a/test/ref_integrity.js +++ b/test/ref_integrity.js @@ -59,5 +59,3 @@ module.exports.freed_namespace_unwrappable = function(assert) { global.gc(); assert.done(); }; - - diff --git a/test/text_node.js b/test/text_node.js index dc03f5b7..93b58dd0 100644 --- a/test/text_node.js +++ b/test/text_node.js @@ -24,4 +24,3 @@ module.exports.cdata = function(assert) { assert.equal(undefined, doc.child(0).name()); assert.done(); }; - diff --git a/test/traversal.js b/test/traversal.js index bebdaed1..a6f55e4a 100644 --- a/test/traversal.js +++ b/test/traversal.js @@ -96,4 +96,3 @@ module.exports.parsed_children = function(assert) { assert.done(); }; - diff --git a/test/z_memory_leak.js b/test/z_memory_leak.js new file mode 100644 index 00000000..42eb3bca --- /dev/null +++ b/test/z_memory_leak.js @@ -0,0 +1,18 @@ +var libxml = require('../index'); + +if (!global.gc) { + throw new Error('must run with --expose_gc for memory management tests'); +} + +/* + * TODO: Possibly create a custom `nodeunit` test reporter (https://github.com/caolan/nodeunit#command-line-options) + * that will run this check after each test. This would allow us to see exactly which test is causing a leak. + * + */ + +// run this test last to check for any unfreed nodes +module.exports.detect_leaks = function(assert) { + global.gc(); + assert.ok(libxml.nodeCount() < 1); + assert.done(); +} diff --git a/vendor/libxml/include/libxml/tree.h b/vendor/libxml/include/libxml/tree.h index da12daae..96de624c 100644 --- a/vendor/libxml/include/libxml/tree.h +++ b/vendor/libxml/include/libxml/tree.h @@ -504,6 +504,8 @@ struct _xmlNode { void *psvi; /* for type/PSVI informations */ unsigned short line; /* line number */ unsigned short extra; /* extra data for XPath/XSLT */ + + int refs; /* reference count for v8 garbage collection */ }; /**