Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial fix for #794. I think the solution can be drastically simplif…

…ied. Details below.

This patch changes the behavior of node.content to recursively
aggregate text from children nodes, unless this is a Text node in
which case the content is returned.

In order to support recursively appending contents of children
elements and to fix the broken append behavior demonstrated by the
test in the previous commit, I had to remove the caching (which I'm
not sure why it was there in the first place) Also since we
recursively append text, we'll have to invalidate the cache when
changes deeper in the document tree occur.

Also I removed the content() from XML::Text since we don't need it
anymore.
  • Loading branch information...
commit 1928899ac9f257e6e8daf631a8a28aa47e015611 1 parent 31dd404
@jvshahid authored
View
44 ext/java/nokogiri/XmlNode.java
@@ -809,23 +809,46 @@ private boolean isErrorIncreased(RubyArray baseErrors, RubyArray createdErrors)
@JRubyMethod(name = {"content", "text", "inner_text"})
public IRubyObject content(ThreadContext context) {
- if (content != null && content.isNil()) return content;
+ if (!node.hasChildNodes() && node.getNodeValue() == null &&
+ (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE))
+ return context.nil;
String textContent;
- if (content != null) textContent = rubyStringToString(content);
- else if (this instanceof XmlDocument) {
+ if (this instanceof XmlDocument) {
Node node = ((Document)this.node).getDocumentElement();
if (node == null) {
textContent = "";
} else {
- textContent = ((Document)this.node).getDocumentElement().getTextContent();
+ Node documentElement = ((Document)this.node).getDocumentElement();
+ StringBuffer buffer = new StringBuffer();
+ getTextContentRecursively(context, buffer, documentElement);
+ textContent = buffer.toString();
}
} else {
- textContent = this.node.getTextContent();
- }
- textContent = NokogiriHelpers.convertEncodingByNKFIfNecessary(context.getRuntime(), (XmlDocument)document(context), textContent);
- String decodedText = null;
- if (textContent != null) decodedText = NokogiriHelpers.decodeJavaString(textContent);
- return stringOrNil(context.getRuntime(), decodedText);
+ StringBuffer buffer = new StringBuffer();
+ getTextContentRecursively(context, buffer, node);
+ textContent = buffer.toString();
+ }
+ NokogiriHelpers.convertEncodingByNKFIfNecessary(context.getRuntime(), (XmlDocument)document(context), textContent);
+ return stringOrNil(context.getRuntime(), textContent);
+ }
+
+ private void getTextContentRecursively(ThreadContext context, StringBuffer buffer, Node currentNode) {
+ String textContent = currentNode.getNodeValue();
+ if (textContent != null && NokogiriHelpers.shouldDecode(currentNode))
+ textContent = NokogiriHelpers.decodeJavaString(textContent);
+ if (textContent != null)
+ buffer.append(textContent);
+ NodeList children = currentNode.getChildNodes();
+ for (int i = 0; i < children.getLength(); i++) {
+ Node child = children.item(i);
+ if (hasTextContent(child))
+ getTextContentRecursively(context, buffer, child);
+ }
+ }
+
+ private boolean hasTextContent(Node child) {
+ return child.getNodeType() != Node.COMMENT_NODE &&
+ child.getNodeType() != Node.PROCESSING_INSTRUCTION_NODE;
}
@JRubyMethod
@@ -1059,7 +1082,6 @@ public IRubyObject namespaced_key_p(ThreadContext context, IRubyObject elementLN
}
protected void setContent(IRubyObject content) {
- this.content = content;
String javaContent = rubyStringToString(content);
node.setTextContent(javaContent);
if (javaContent.length() == 0) return;
View
16 ext/java/nokogiri/XmlText.java
@@ -34,13 +34,11 @@
import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate;
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
-import static nokogiri.internals.NokogiriHelpers.stringOrNil;
import nokogiri.internals.SaveContextVisitor;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.anno.JRubyClass;
-import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Document;
@@ -88,17 +86,7 @@ protected IRubyObject getNodeName(ThreadContext context) {
if (name == null) name = context.getRuntime().newString("text");
return name;
}
-
- @Override
- @JRubyMethod(name = {"content", "text", "inner_text"})
- public IRubyObject content(ThreadContext context) {
- if (content == null || content.isNil()) {
- return stringOrNil(context.getRuntime(), node.getTextContent());
- } else {
- return content;
- }
- }
-
+
@Override
public void accept(ThreadContext context, SaveContextVisitor visitor) {
visitor.enter((Text)node);
@@ -114,6 +102,6 @@ public void accept(ThreadContext context, SaveContextVisitor visitor) {
}
child = child.getNextSibling();
}
- visitor.leave((Text)node);
+ visitor.leave(node);
}
}
View
16 ext/java/nokogiri/internals/NokogiriHelpers.java
@@ -45,7 +45,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
-import java.util.SortedMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -685,9 +684,9 @@ private static String guessEncoding() {
if (name == null) name = "UTF-8";
return name;
}
-
- private static Set<String> charsetNames = ((SortedMap<String, Charset>)Charset.availableCharsets()).keySet();
-
+
+ private static Set<String> charsetNames = Charset.availableCharsets().keySet();
+
private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) {
String givenEncoding = rubyStringToString(encoding);
if (charsetNames.contains(givenEncoding)) return givenEncoding;
@@ -807,4 +806,13 @@ public static String nkf(Ruby runtime, String ruby_encoding, String thing) {
private static Charset shift_jis = Charset.forName("Shift_JIS");
private static Charset jis = Charset.forName("ISO-2022-JP");
private static Charset euc_jp = Charset.forName("EUC-JP");
+
+ public static boolean shouldEncode(Node text) {
+ return text.getUserData(NokogiriHelpers.ENCODED_STRING) == null ||
+ !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING));
+ }
+
+ public static boolean shouldDecode(Node text) {
+ return !shouldEncode(text);
+ }
}
View
34 ext/java/nokogiri/internals/SaveContextVisitor.java
@@ -72,14 +72,26 @@
*/
public class SaveContextVisitor {
- private StringBuffer buffer;
- private Stack<String> indentation;
- private String encoding, indentString;
- private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
- private boolean canonical, incl_ns, with_comments, subsets, exclusive;
- private List<Node> c14nNodeList;
- private Deque<Attr[]> c14nNamespaceStack;
- private Deque<Attr[]> c14nAttrStack;
+ private final StringBuffer buffer;
+ private final Stack<String> indentation;
+ private String encoding;
+ private final String indentString;
+ private boolean format;
+ private final boolean noDecl;
+ private final boolean noEmpty;
+ private final boolean noXhtml;
+ private final boolean asXhtml;
+ private boolean asXml;
+ private final boolean asHtml;
+ private final boolean asBuilder;
+ private boolean htmlDoc;
+ private final boolean fragment;
+ private final boolean canonical, incl_ns, with_comments;
+ private boolean subsets;
+ private boolean exclusive;
+ private final List<Node> c14nNodeList;
+ private final Deque<Attr[]> c14nNamespaceStack;
+ private final Deque<Attr[]> c14nAttrStack;
private List<String> c14nExclusiveInclusivePrefixes = null;
/*
* U can't touch this.
@@ -181,7 +193,7 @@ public boolean enter(Node node) {
return enter((Entity)node);
}
if (node instanceof EntityReference) {
- return enter((EntityReference)node);
+ return enter(node);
}
if (node instanceof Notation) {
return enter((Notation)node);
@@ -225,7 +237,7 @@ public void leave(Node node) {
return;
}
if (node instanceof EntityReference) {
- leave((EntityReference)node);
+ leave(node);
return;
}
if (node instanceof Notation) {
@@ -730,7 +742,7 @@ public boolean enter(Text text) {
}
}
- if (text.getUserData(NokogiriHelpers.ENCODED_STRING) == null || !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING))) {
+ if (NokogiriHelpers.shouldEncode(text)) {
textContent = encodeJavaString(textContent);
}
Please sign in to comment.
Something went wrong with that request. Please try again.