Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory management: recursively reference each parent xmlNode #360

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ module.exports.SaxPushParser = sax_parser.SaxPushParser;

module.exports.memoryUsage = bindings.xmlMemUsed;

module.exports.nodeCount = bindings.xmlNodeCount;
41 changes: 34 additions & 7 deletions src/libxmljs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ LibXMLJS LibXMLJS::instance;
// track how much memory libxml2 is using
int xml_memory_used = 0;

// track how many nodes haven't been freed
int nodeCount = 0;

// wrapper for xmlMemMalloc to update v8's knowledge of memory used
// the GC relies on this information
void* xmlMemMallocWrap(size_t size)
Expand Down Expand Up @@ -142,24 +145,40 @@ void deregisterNodeNamespaces(xmlNode* xml_obj)
* wrapped, it is necessary to update any wrappers (`XmlNamespace`) which have
* been created for attached namespaces.
*/

void xmlDeregisterNodeCallback(xmlNode* xml_obj)
{
deregisterNodeNamespaces(xml_obj);
if (xml_obj->_private)
if (xml_obj->_private && xml_obj->type != XML_DOCUMENT_NODE && xml_obj->type != XML_HTML_DOCUMENT_NODE)
{
XmlNode* node = static_cast<XmlNode*>(xml_obj->_private);

// flag the XmlNode object as freed
node->freed = true;
// keep a copy of the doc so we can free it in the `XmlNode` destructor
node->doc = xml_obj->doc->_private;

// save a reference to the doc so we can still `unref` it
node->doc = xml_obj->doc;
// set xml_obj to NULL so the `XmlNode` destructor knows not to use it
node->xml_obj = NULL;
}
return;

// decrease the node count
if (nodeCount > 0)
nodeCount--;
// NOTE: nodeCount can go below 0 in some cases involving HTML documents
// creating certain document nodes that don't call xmlRegisterNode

deregisterNodeNamespaces(xml_obj);
}

// this is called for any created nodes
void xmlRegisterNodeCallback(xmlNode* xml_obj)
{
nodeCount++;
}

LibXMLJS::LibXMLJS()
{
// set the callback for when a node is created
xmlRegisterNodeDefault(xmlRegisterNodeCallback);

// set the callback for when a node is about to be freed
xmlDeregisterNodeDefault(xmlDeregisterNodeCallback);

Expand Down Expand Up @@ -228,6 +247,12 @@ NAN_METHOD(XmlMemUsed)
return info.GetReturnValue().Set(Nan::New<v8::Int32>(xmlMemUsed()));
}

NAN_METHOD(XmlNodeCount)
{
Nan::HandleScope scope;
return info.GetReturnValue().Set(Nan::New<v8::Int32>(nodeCount));
}

NAN_MODULE_INIT(init)
{
Nan::HandleScope scope;
Expand All @@ -249,6 +274,8 @@ NAN_MODULE_INIT(init)
Nan::Set(target, Nan::New<v8::String>("libxml").ToLocalChecked(), target);

Nan::SetMethod(target, "xmlMemUsed", XmlMemUsed);

Nan::SetMethod(target, "xmlNodeCount", XmlNodeCount);
}

NODE_MODULE(xmljs, init)
Expand Down
7 changes: 7 additions & 0 deletions src/xml_document.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,21 @@ class XmlDocument : public Nan::ObjectWrap {
// given xmlDoc object, intended for use in c++ space
static v8::Local<v8::Object> New(xmlDoc* doc);

// expose ObjectWrap::Ref
void ref() {
Ref();
}

// expose ObjectWrap::Unref
void unref() {
Unref();
}

// expose ObjectWrap::refs_ (for testing)
int refs() {
return refs_;
}

protected:

// initialize a new document
Expand Down
21 changes: 0 additions & 21 deletions src/xml_element.cc
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,6 @@ XmlElement::get_attrs() {
return scope.Escape(attributes);
}

void
XmlElement::add_child(xmlNode* child) {
xmlAddChild(xml_obj, child);
}

void
XmlElement::add_cdata(xmlNode* cdata) {
xmlAddChild(xml_obj, cdata);
Expand Down Expand Up @@ -473,16 +468,6 @@ XmlElement::XmlElement(xmlNode* node)
{
}

void
XmlElement::add_prev_sibling(xmlNode* element) {
xmlAddPrevSibling(xml_obj, element);
}

void
XmlElement::add_next_sibling(xmlNode* element) {
xmlAddNextSibling(xml_obj, element);
}

void
XmlElement::replace_element(xmlNode* element) {
xmlReplaceNode(xml_obj, element);
Expand All @@ -494,12 +479,6 @@ XmlElement::replace_text(const char* content) {
xmlReplaceNode(xml_obj, txt);
}

xmlNode*
XmlElement::import_element(XmlElement *element) {
return (xml_obj->doc == element->xml_obj->doc) ?
element->xml_obj : xmlDocCopyNode(element->xml_obj, xml_obj->doc, 1);
}

void
XmlElement::Initialize(v8::Handle<v8::Object> target)
{
Expand Down
4 changes: 0 additions & 4 deletions src/xml_element.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,13 @@ class XmlElement : public XmlNode {
v8::Local<v8::Value> get_attr(const char* name);
v8::Local<v8::Value> get_attrs();
void set_attr(const char* name, const char* value);
void add_child(xmlNode* child);
void add_cdata(xmlNode* cdata);
void set_content(const char* content);
v8::Local<v8::Value> get_content();
v8::Local<v8::Value> get_next_element();
v8::Local<v8::Value> get_prev_element();
void add_prev_sibling(xmlNode* element);
void add_next_sibling(xmlNode* element);
void replace_element(xmlNode* element);
void replace_text(const char* content);
xmlNode *import_element(XmlElement* element);
};

} // namespace libxmljs
Expand Down
168 changes: 123 additions & 45 deletions src/xml_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,68 +230,137 @@ XmlNode::New(xmlNode* node)
}

XmlNode::XmlNode(xmlNode* node) : xml_obj(node) {
this->freed = false;
xml_obj->_private = this;

// this will prevent the document from being cleaned up
// we keep the document if any of the nodes attached to it are still alive
// increase the ref count of each parent
this->ref();

XmlDocument* doc = static_cast<XmlDocument*>(xml_obj->doc->_private);
doc->ref();

// check that there's a parent node with a _private pointer
// also check that the parent node isn't the doc since we already Ref() the doc once
if (xml_obj->parent != NULL &&
xml_obj->parent->_private != NULL &&
(void*)xml_obj->doc != (void*)xml_obj->parent)
{
static_cast<XmlNode*>(xml_obj->parent->_private)->Ref();
}
}

XmlNode::~XmlNode() {

// check if `xml_obj` has been freed so we don't access bad memory
if (this->freed)
{

// unref the doc using the doc reference we saved in the `flagNode` callback
if (this->doc)
{
XmlDocument* doc = static_cast<XmlDocument*>(this->doc->_private);
doc->unref();
}
// if we already freed xml_obj,
// then unref() the doc with this->doc

// set doc to null for good measure?
// this->doc = NULL;

// return so we don't attempt to use `xml_obj`
if (xml_obj == NULL) {
XmlDocument* doc = static_cast<XmlDocument*>(this->doc);
doc->unref();
return;
}

xml_obj->_private = NULL;
// release the hold and allow the document to be freed
// this->unref() may cause `xml_obj` to be freed
// so we create a reference to the document now
XmlDocument* doc = static_cast<XmlDocument*>(xml_obj->doc->_private);

// decrease the ref count of each parent
this->unref();

// doc->unref() may cause `xml_obj` to be freed
// so we unref the document _after_ we unref each parent
doc->unref();

// We do not free the xmlNode here if it is linked to a document
// It will be freed when the doc is freed
if (xml_obj->parent == NULL)
xmlFreeNode(xml_obj);

// if there's a parent then Unref() it
else if (xml_obj->parent->_private != NULL &&
(void*)xml_obj->doc != (void*)xml_obj->parent)
{
XmlNode* parent = static_cast<XmlNode*>(xml_obj->parent->_private);

// make sure Unref() is necessary
if (parent->refs_ > 0)
{
parent->Unref();
}
// unref() may have already set _private to NULL
// so we check that `xml_obj` is still available
if (xml_obj != NULL)
xml_obj->_private = NULL;
}

// recursively ref each parent
void XmlNode::ref(int count) {
// make sure count > 0
if (count < 1)
count = 1;

// loop through each parent and increase the ref count
xmlNode* node = xml_obj;
while (node->parent != NULL) {
node = node->parent;
if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
break;
node->refs += count;
}
}

// recursively unref each parent
void XmlNode::unref(int count) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little worried about this approach. Rather than explicitly counting references, why not hold an object which tracks referencing, similar to a smart pointer. This would be much safer than explicitly counting

// make sure count > 0
if (count < 1)
count = 1;

// loop through each parent and decrease the ref count
xmlNode* node = xml_obj;
while (node->parent != NULL) {
node = node->parent;
if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
break;
node->refs -= count;
}
/*
* At this point `node` is now the top-most parent.
*
* If `node` is NOT a document, then we know that it is
* detached from the document since it has no parent.
*
*/

/*
* Here we free the top-most parent node IF:
* 1. it's not a document
* 2. it has no refs
*
*/
if (node->type != XML_DOCUMENT_NODE && node->type != XML_HTML_DOCUMENT_NODE && node->refs == 0) {
xmlFreeNode(node);
}
}

void
XmlNode::add_child(xmlNode* child) {
xmlAddChild(xml_obj, child);
// if the child is wrapped then we have to ref its parents
if (child->_private != NULL) {
XmlNode* node = static_cast<XmlNode*>(child->_private);
node->ref(child->refs);
}
}

void
XmlNode::add_prev_sibling(xmlNode* node) {
xmlAddPrevSibling(xml_obj, node);
if (node->_private != NULL)
ref(node->refs);
}

void
XmlNode::add_next_sibling(xmlNode* node) {
xmlAddNextSibling(xml_obj, node);
if (node->_private != NULL)
ref(node->refs);
}

xmlNode*
XmlNode::import_element(XmlNode *element) {
/*
* We must clone the node if it has a parent (even if it is in the same document)
*
* Unless the node is a free-standing orphan without a document
* then we must clone it or there will be refcount issues.
*
*/
xmlNode* node = element->xml_obj;
if (xml_obj->doc == node->doc &&
node->parent == NULL &&
node->type != XML_DOCUMENT_NODE &&
node->type != XML_HTML_DOCUMENT_NODE) {
return node;
}else{
node = xmlDocCopyNode(node, xml_obj->doc, 1);
return node;
}
}

v8::Local<v8::Value>
XmlNode::get_doc() {
Nan::EscapableHandleScope scope;
Expand Down Expand Up @@ -430,7 +499,16 @@ XmlNode::to_string(int options) {

void
XmlNode::remove() {
xmlUnlinkNode(xml_obj);
/*
* Here we unref each parent before removing the node
*
* We must unref each parent with the number of refs
* the current node has, because the current node may
* have children that need unref'ed as well.
*
*/
this->unref(xml_obj->refs);
xmlUnlinkNode(xml_obj);
}

v8::Local<v8::Value>
Expand Down
16 changes: 12 additions & 4 deletions src/xml_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@ class XmlNode : public Nan::ObjectWrap {

xmlNode* xml_obj;

// boolean value to check if `xml_obj` was already freed
bool freed;

// backup reference to the doc in case `xml_obj` was already freed
xmlDoc* doc;
void* doc;

// recursive referencing functions
void ref(int count = 1);
void unref(int count = 1);
int refs() {
return refs_;
};

explicit XmlNode(xmlNode* node);
virtual ~XmlNode();
Expand Down Expand Up @@ -55,6 +59,10 @@ class XmlNode : public Nan::ObjectWrap {
v8::Local<v8::Value> get_type();
v8::Local<v8::Value> to_string(int options = 0);
void remove();
xmlNode *import_element(XmlNode* element);
void add_child(xmlNode* child);
void add_prev_sibling(xmlNode* element);
void add_next_sibling(xmlNode* element);
};

} // namespace libxmljs
Expand Down