Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Add SAXParserContext#recovery. If it's set to true, parser will recov…

…er from fatal errors (such as spurious <?xml?> in the middle of the document).
  • Loading branch information...
commit 16250b7d2eb0099c1a12a968bad93609a5c8ed90 1 parent 6f373ca
Eugene Pimenov authored
33 ext/java/nokogiri/XmlSaxParserContext.java
View
@@ -75,11 +75,14 @@
"http://xml.org/sax/features/namespace-prefixes";
protected static final String FEATURE_LOAD_EXTERNAL_DTD =
"http://apache.org/xml/features/nonvalidating/load-external-dtd";
+ protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR =
+ "http://apache.org/xml/features/continue-after-fatal-error";
protected AbstractSAXParser parser;
protected NokogiriHandler handler = null;
private IRubyObject replaceEntities;
+ private IRubyObject recovery;
public XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) {
super(ruby, rubyClass);
@@ -87,6 +90,7 @@ public XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) {
protected void initialize(Ruby runtime) {
replaceEntities = runtime.getTrue();
+ recovery = runtime.getFalse();
try {
parser = createParser();
} catch (SAXException se) {
@@ -199,6 +203,13 @@ protected void preParse(ThreadContext context,
IRubyObject handlerRuby,
NokogiriHandler handler) {
((XmlSaxParser) parser).setXmlDeclHandler(handler);
+ if(recovery.isTrue()) {
+ try {
+ ((XmlSaxParser) parser).setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true);
+ } catch(Exception e) {
+ throw RaiseException.createNativeRaiseException(context.getRuntime(), e);
+ }
+ }
}
protected void postParse(ThreadContext context,
@@ -290,6 +301,28 @@ public IRubyObject get_replace_entities(ThreadContext context) {
return replaceEntities;
}
+ /**
+ * Can take a boolean assignment.
+ *
+ * @param context
+ * @param value
+ * @return
+ */
+ @JRubyMethod(name = "recovery=")
+ public IRubyObject set_recovery(ThreadContext context,
+ IRubyObject value) {
+ if (!value.isTrue()) recovery = context.getRuntime().getFalse();
+ else recovery = context.getRuntime().getTrue();
+
+ return this;
+ }
+
+ @JRubyMethod(name="recovery")
+ public IRubyObject get_recovery(ThreadContext context) {
+ return recovery;
+ }
+
+
/**
* If the handler's document is a FragmentHandler, attempt to trim
40 ext/nokogiri/xml_sax_parser_context.c
View
@@ -178,6 +178,44 @@ static VALUE column(VALUE self)
return Qnil;
}
+/*
+ * call-seq:
+ * recovery=(boolean)
+ *
+ * Should this parser recover from structural errors? It will not stop processing
+ * file on structural errors if if set to true
+ */
+static VALUE set_recovery(VALUE self, VALUE value)
+{
+ xmlParserCtxtPtr ctxt;
+ Data_Get_Struct(self, xmlParserCtxt, ctxt);
+
+ if(value == Qfalse)
+ ctxt->recovery = 0;
+ else
+ ctxt->recovery = 1;
+
+ return value;
+}
+
+/*
+ * call-seq:
+ * recovery
+ *
+ * Should this parser recover from structural errors? It will not stop processing
+ * file on structural errors if if set to true
+ */
+static VALUE get_recovery(VALUE self)
+{
+ xmlParserCtxtPtr ctxt;
+ Data_Get_Struct(self, xmlParserCtxt, ctxt);
+
+ if(ctxt->recovery == 0)
+ return Qfalse;
+ else
+ return Qtrue;
+}
+
void init_xml_sax_parser_context()
{
VALUE nokogiri = rb_define_module("Nokogiri");
@@ -194,6 +232,8 @@ void init_xml_sax_parser_context()
rb_define_method(klass, "parse_with", parse_with, 1);
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
+ rb_define_method(klass, "recovery=", set_recovery, 1);
+ rb_define_method(klass, "recovery", get_recovery, 0);
rb_define_method(klass, "line", line, 0);
rb_define_method(klass, "column", column, 0);
}
16 test/xml/sax/test_parser.rb
View
@@ -332,6 +332,22 @@ def test_parser_attributes
assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
end
+
+ def test_recovery_from_incorrect_xml
+ xml = <<-eoxml
+<?xml version="1.0" ?><Root><Data><?xml version='1.0'?><Item>hey</Item></Data><Data><Item>hey yourself</Item></Data></Root>
+ eoxml
+
+ block_called = false
+ @parser.parse(xml) { |ctx|
+ block_called = true
+ ctx.recovery = true
+ }
+
+ assert block_called
+
+ assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
+ end
end
end
end
9 test/xml/sax/test_parser_context.rb
View
@@ -65,6 +65,15 @@ def test_replace_entities
assert_equal true, pc.replace_entities
end
+ def test_recovery
+ pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
+ pc.recovery = false
+ assert_equal false, pc.recovery
+
+ pc.recovery = true
+ assert_equal true, pc.recovery
+ end
+
def test_from_io
assert_nothing_raised do
ParserContext.new StringIO.new('fo'), 'UTF-8'
Please sign in to comment.
Something went wrong with that request. Please try again.