Skip to content

Commit

Permalink
Merge pull request #1834 from sparklemotion/1063-flavorjones-dup-docu…
Browse files Browse the repository at this point in the history
…ment-fragment-attempt-2

fix #1063: poor memory performance when `dup`ing DocumentFragment
  • Loading branch information
flavorjones committed Dec 10, 2018
2 parents ed6afeb + 5b69bdd commit e2a4f6b
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* `XML::Attr#value=` allows HTML node attribute values to be set to either a blank string or an empty boolean attribute. [#1800]
* Introduce `XML::Node#wrap` which does what `XML::NodeSet#wrap` has always done, but for a single node. [#1531] (Thanks, @ethirajsrinivasan!)
* [MRI] Improve installation experience on macOS High Sierra (Darwin). [#1812, #1813] (Thanks, @gpakosz and @nurse!)
* [MRI] Node#dup supports copying a node directly to a new document. See the method documentation for details.
* [MRI] DocumentFragment#dup is now more memory-efficient, avoiding making unnecessary copies. [#1063]
* [JRuby] NodeSet has been rewritten to improve performance! [#1795]


Expand Down
33 changes: 25 additions & 8 deletions ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ static void relink_namespace(xmlNodePtr reparented)
&& ns != reparented->ns
&& xmlStrEqual(ns->prefix, reparented->ns->prefix)
&& xmlStrEqual(ns->href, reparented->ns->href)
) {
) {
xmlSetNs(reparented, ns);
}
}
Expand Down Expand Up @@ -532,22 +532,39 @@ static VALUE internal_subset(VALUE self)
/*
* call-seq:
* dup
* dup(depth)
* dup(depth, new_parent_doc)
*
* Copy this node. An optional depth may be passed in, but it defaults
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
* Copy this node.
* An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy.
* An optional new_parent_doc may also be passed in, which will be the new
* node's parent document. Defaults to the current node's document.
* current document.
*/
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
{
VALUE level;
VALUE r_level, r_new_parent_doc;
int level;
int n_args;
xmlDocPtr new_parent_doc;
xmlNodePtr node, dup;

if(rb_scan_args(argc, argv, "01", &level) == 0) {
level = INT2NUM((long)1);
Data_Get_Struct(self, xmlNode, node);

n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);

if (n_args < 1) {
r_level = INT2NUM((long)1);
}
level = (int)NUM2INT(r_level);

Data_Get_Struct(self, xmlNode, node);
if (n_args < 2) {
new_parent_doc = node->doc;
} else {
Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
}

dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
dup = xmlDocCopyNode(node, new_parent_doc, level);
if(dup == NULL) { return Qnil; }

nokogiri_root_node(dup);
Expand Down
11 changes: 11 additions & 0 deletions lib/nokogiri/xml/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ def initialize document, tags = nil, ctx = nil
children.each { |child| child.parent = self }
end

if Nokogiri.uses_libxml?
def dup
new_document = document.dup
new_fragment = XML::DocumentFragment.new(new_document)
children.each do |child|
child.dup(1, new_document).parent = new_fragment
end
new_fragment
end
end

###
# return the name for DocumentFragment
def name
Expand Down
14 changes: 14 additions & 0 deletions test/xml/test_document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,20 @@ def test_issue_1077_parsing_of_frozen_strings
Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised
end

def test_dup_creates_tree_with_identical_structure
original = Nokogiri::XML::DocumentFragment.parse("<div><p>hello</p></div>")
duplicate = original.dup
assert_equal original.to_html, duplicate.to_html
end

def test_dup_creates_mutable_tree
original = Nokogiri::XML::DocumentFragment.parse("<div><p>hello</p></div>")
duplicate = original.dup
duplicate.at_css("div").add_child("<b>hello there</b>")
assert_nil original.at_css("b")
assert_not_nil duplicate.at_css("b")
end

if Nokogiri.uses_libxml?
def test_for_libxml_in_context_fragment_parsing_bug_workaround
10.times do
Expand Down
38 changes: 38 additions & 0 deletions test/xml/test_node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,44 @@ def test_parse_with_unparented_html_fragment_text_context_node
assert_equal x.first.name, "span"
end

def test_dup_is_deep_copy_by_default
doc = XML::Document.parse "<root><div><p>hello</p></div></root>"
div = doc.at_css "div"
node = div.dup
assert_equal 1, node.children.length
assert_equal "<p>hello</p>", node.children.first.to_html
end

def test_dup_deep_copy
doc = XML::Document.parse "<root><div><p>hello</p></div></root>"
div = doc.at_css "div"
node = div.dup(1)
assert_equal 1, node.children.length
assert_equal "<p>hello</p>", node.children.first.to_html
end

def test_dup_shallow_copy
doc = XML::Document.parse "<root><div><p>hello</p></div></root>"
div = doc.at_css "div"
node = div.dup(0)
assert_equal 0, node.children.length
end

if Nokogiri.uses_libxml?
def test_dup_to_another_document
doc1 = HTML::Document.parse "<root><div><p>hello</p></div></root>"
doc2 = HTML::Document.parse "<div></div>"

div = doc1.at_css "div"
duplicate_div = div.dup(1, doc2)

assert_not_nil doc1.at_css("div")
assert_equal doc2, duplicate_div.document
assert_equal 1, duplicate_div.children.length
assert_equal "<p>hello</p>", duplicate_div.children.first.to_html
end
end

def test_subclass_dup
subclass = Class.new(Nokogiri::XML::Node)
node = subclass.new('foo', @xml).dup
Expand Down

0 comments on commit e2a4f6b

Please sign in to comment.