Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Initial fix for #794. I think the solution can be drastically simplif…

…ied. Details below.

This patch changes the behavior of node.content to recursively
aggregate text from children nodes, unless this is a Text node in
which case the content is returned.

In order to support recursively appending contents of children
elements and to fix the broken append behavior demonstrated by the
test in the previous commit, I had to remove the caching (which I'm
not sure why it was there in the first place) Also since we
recursively append text, we'll have to invalidate the cache when
changes deeper in the document tree occur.

Also I removed the content() from XML::Text since we don't need it
anymore.
  • Loading branch information...
commit 1928899ac9f257e6e8daf631a8a28aa47e015611 1 parent 31dd404
John Shahid authored
44 ext/java/nokogiri/XmlNode.java
@@ -809,23 +809,46 @@ private boolean isErrorIncreased(RubyArray baseErrors, RubyArray createdErrors)
809 809
810 810 @JRubyMethod(name = {"content", "text", "inner_text"})
811 811 public IRubyObject content(ThreadContext context) {
812   - if (content != null && content.isNil()) return content;
  812 + if (!node.hasChildNodes() && node.getNodeValue() == null &&
  813 + (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE))
  814 + return context.nil;
813 815 String textContent;
814   - if (content != null) textContent = rubyStringToString(content);
815   - else if (this instanceof XmlDocument) {
  816 + if (this instanceof XmlDocument) {
816 817 Node node = ((Document)this.node).getDocumentElement();
817 818 if (node == null) {
818 819 textContent = "";
819 820 } else {
820   - textContent = ((Document)this.node).getDocumentElement().getTextContent();
  821 + Node documentElement = ((Document)this.node).getDocumentElement();
  822 + StringBuffer buffer = new StringBuffer();
  823 + getTextContentRecursively(context, buffer, documentElement);
  824 + textContent = buffer.toString();
821 825 }
822 826 } else {
823   - textContent = this.node.getTextContent();
824   - }
825   - textContent = NokogiriHelpers.convertEncodingByNKFIfNecessary(context.getRuntime(), (XmlDocument)document(context), textContent);
826   - String decodedText = null;
827   - if (textContent != null) decodedText = NokogiriHelpers.decodeJavaString(textContent);
828   - return stringOrNil(context.getRuntime(), decodedText);
  827 + StringBuffer buffer = new StringBuffer();
  828 + getTextContentRecursively(context, buffer, node);
  829 + textContent = buffer.toString();
  830 + }
  831 + NokogiriHelpers.convertEncodingByNKFIfNecessary(context.getRuntime(), (XmlDocument)document(context), textContent);
  832 + return stringOrNil(context.getRuntime(), textContent);
  833 + }
  834 +
  835 + private void getTextContentRecursively(ThreadContext context, StringBuffer buffer, Node currentNode) {
  836 + String textContent = currentNode.getNodeValue();
  837 + if (textContent != null && NokogiriHelpers.shouldDecode(currentNode))
  838 + textContent = NokogiriHelpers.decodeJavaString(textContent);
  839 + if (textContent != null)
  840 + buffer.append(textContent);
  841 + NodeList children = currentNode.getChildNodes();
  842 + for (int i = 0; i < children.getLength(); i++) {
  843 + Node child = children.item(i);
  844 + if (hasTextContent(child))
  845 + getTextContentRecursively(context, buffer, child);
  846 + }
  847 + }
  848 +
  849 + private boolean hasTextContent(Node child) {
  850 + return child.getNodeType() != Node.COMMENT_NODE &&
  851 + child.getNodeType() != Node.PROCESSING_INSTRUCTION_NODE;
829 852 }
830 853
831 854 @JRubyMethod
@@ -1059,7 +1082,6 @@ public IRubyObject namespaced_key_p(ThreadContext context, IRubyObject elementLN
1059 1082 }
1060 1083
1061 1084 protected void setContent(IRubyObject content) {
1062   - this.content = content;
1063 1085 String javaContent = rubyStringToString(content);
1064 1086 node.setTextContent(javaContent);
1065 1087 if (javaContent.length() == 0) return;
16 ext/java/nokogiri/XmlText.java
@@ -34,13 +34,11 @@
34 34
35 35 import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate;
36 36 import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
37   -import static nokogiri.internals.NokogiriHelpers.stringOrNil;
38 37 import nokogiri.internals.SaveContextVisitor;
39 38
40 39 import org.jruby.Ruby;
41 40 import org.jruby.RubyClass;
42 41 import org.jruby.anno.JRubyClass;
43   -import org.jruby.anno.JRubyMethod;
44 42 import org.jruby.runtime.ThreadContext;
45 43 import org.jruby.runtime.builtin.IRubyObject;
46 44 import org.w3c.dom.Document;
@@ -88,17 +86,7 @@ protected IRubyObject getNodeName(ThreadContext context) {
88 86 if (name == null) name = context.getRuntime().newString("text");
89 87 return name;
90 88 }
91   -
92   - @Override
93   - @JRubyMethod(name = {"content", "text", "inner_text"})
94   - public IRubyObject content(ThreadContext context) {
95   - if (content == null || content.isNil()) {
96   - return stringOrNil(context.getRuntime(), node.getTextContent());
97   - } else {
98   - return content;
99   - }
100   - }
101   -
  89 +
102 90 @Override
103 91 public void accept(ThreadContext context, SaveContextVisitor visitor) {
104 92 visitor.enter((Text)node);
@@ -114,6 +102,6 @@ public void accept(ThreadContext context, SaveContextVisitor visitor) {
114 102 }
115 103 child = child.getNextSibling();
116 104 }
117   - visitor.leave((Text)node);
  105 + visitor.leave(node);
118 106 }
119 107 }
16 ext/java/nokogiri/internals/NokogiriHelpers.java
@@ -45,7 +45,6 @@
45 45 import java.util.ArrayList;
46 46 import java.util.List;
47 47 import java.util.Set;
48   -import java.util.SortedMap;
49 48 import java.util.regex.Matcher;
50 49 import java.util.regex.Pattern;
51 50
@@ -685,9 +684,9 @@ private static String guessEncoding() {
685 684 if (name == null) name = "UTF-8";
686 685 return name;
687 686 }
688   -
689   - private static Set<String> charsetNames = ((SortedMap<String, Charset>)Charset.availableCharsets()).keySet();
690   -
  687 +
  688 + private static Set<String> charsetNames = Charset.availableCharsets().keySet();
  689 +
691 690 private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) {
692 691 String givenEncoding = rubyStringToString(encoding);
693 692 if (charsetNames.contains(givenEncoding)) return givenEncoding;
@@ -807,4 +806,13 @@ public static String nkf(Ruby runtime, String ruby_encoding, String thing) {
807 806 private static Charset shift_jis = Charset.forName("Shift_JIS");
808 807 private static Charset jis = Charset.forName("ISO-2022-JP");
809 808 private static Charset euc_jp = Charset.forName("EUC-JP");
  809 +
  810 + public static boolean shouldEncode(Node text) {
  811 + return text.getUserData(NokogiriHelpers.ENCODED_STRING) == null ||
  812 + !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING));
  813 + }
  814 +
  815 + public static boolean shouldDecode(Node text) {
  816 + return !shouldEncode(text);
  817 + }
810 818 }
34 ext/java/nokogiri/internals/SaveContextVisitor.java
@@ -72,14 +72,26 @@
72 72 */
73 73 public class SaveContextVisitor {
74 74
75   - private StringBuffer buffer;
76   - private Stack<String> indentation;
77   - private String encoding, indentString;
78   - private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
79   - private boolean canonical, incl_ns, with_comments, subsets, exclusive;
80   - private List<Node> c14nNodeList;
81   - private Deque<Attr[]> c14nNamespaceStack;
82   - private Deque<Attr[]> c14nAttrStack;
  75 + private final StringBuffer buffer;
  76 + private final Stack<String> indentation;
  77 + private String encoding;
  78 + private final String indentString;
  79 + private boolean format;
  80 + private final boolean noDecl;
  81 + private final boolean noEmpty;
  82 + private final boolean noXhtml;
  83 + private final boolean asXhtml;
  84 + private boolean asXml;
  85 + private final boolean asHtml;
  86 + private final boolean asBuilder;
  87 + private boolean htmlDoc;
  88 + private final boolean fragment;
  89 + private final boolean canonical, incl_ns, with_comments;
  90 + private boolean subsets;
  91 + private boolean exclusive;
  92 + private final List<Node> c14nNodeList;
  93 + private final Deque<Attr[]> c14nNamespaceStack;
  94 + private final Deque<Attr[]> c14nAttrStack;
83 95 private List<String> c14nExclusiveInclusivePrefixes = null;
84 96 /*
85 97 * U can't touch this.
@@ -181,7 +193,7 @@ public boolean enter(Node node) {
181 193 return enter((Entity)node);
182 194 }
183 195 if (node instanceof EntityReference) {
184   - return enter((EntityReference)node);
  196 + return enter(node);
185 197 }
186 198 if (node instanceof Notation) {
187 199 return enter((Notation)node);
@@ -225,7 +237,7 @@ public void leave(Node node) {
225 237 return;
226 238 }
227 239 if (node instanceof EntityReference) {
228   - leave((EntityReference)node);
  240 + leave(node);
229 241 return;
230 242 }
231 243 if (node instanceof Notation) {
@@ -730,7 +742,7 @@ public boolean enter(Text text) {
730 742 }
731 743 }
732 744
733   - if (text.getUserData(NokogiriHelpers.ENCODED_STRING) == null || !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING))) {
  745 + if (NokogiriHelpers.shouldEncode(text)) {
734 746 textContent = encodeJavaString(textContent);
735 747 }
736 748

0 comments on commit 1928899

Please sign in to comment.
Something went wrong with that request. Please try again.