Skip to content

Commit

Permalink
Report error and stop parsing instead of silently ignoring it.
Browse files Browse the repository at this point in the history
  • Loading branch information
jvshahid committed Jun 28, 2013
1 parent 42da60e commit 37828ec
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
18 changes: 16 additions & 2 deletions ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,16 @@ public void fatalError(SAXParseException ex) throws SAXException {
// found in the prolog, instead it will keep calling this method and we'll
// keep inserting the error in the document errors array until we run
// out of memory
errors.add(ex);
String message = ex.getMessage();
if (message != null && message.toLowerCase().contains("in prolog")) {

// The problem with Xerces is that some errors will cause the
// parser not to advance the reader and it will keep reporting
// the same error over and over, which will cause the parser
// to enter an infinite loop unless we throw the exception.
if (message != null && isFatal(message)) {
throw ex;
}
errors.add(ex);
}

public void error(String domain, String key, XMLParseException e) {
Expand All @@ -80,4 +85,13 @@ public void warning(String domain, String key, XMLParseException e) {
errors.add(e);
}

/*
* Determine whether this is a fatal error that should cause
* the parsing to stop, or an error that can be ignored.
*/
private static boolean isFatal(String msg) {
return
msg.toLowerCase().contains("in prolog") ||
msg.toLowerCase().contains("preceding the root element must be well-formed");
}
}
6 changes: 6 additions & 0 deletions test/xml/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,12 @@ def test_encoding
end
end

def test_memory_explosion_on_invalid_xml
doc = Nokogiri::XML("<<<")
refute_nil doc
refute_empty doc.errors
end

def test_document_has_errors
doc = Nokogiri::XML(<<-eoxml)
<foo><bar></foo>
Expand Down

0 comments on commit 37828ec

Please sign in to comment.