Skip to content

Commit

Permalink
Merge pull request #453 from libc/master
Browse files Browse the repository at this point in the history
Add Nokogiri::XML::SAX::ParserContext#recovery
  • Loading branch information
jvshahid committed Nov 13, 2013
2 parents 68e1e98 + 804df68 commit 4b7a7fb
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 0 deletions.
33 changes: 33 additions & 0 deletions ext/java/nokogiri/XmlSaxParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,22 @@ public class XmlSaxParserContext extends ParserContext {
"http://xml.org/sax/features/namespace-prefixes";
protected static final String FEATURE_LOAD_EXTERNAL_DTD =
"http://apache.org/xml/features/nonvalidating/load-external-dtd";
protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR =
"http://apache.org/xml/features/continue-after-fatal-error";

protected AbstractSAXParser parser;

protected NokogiriHandler handler = null;
private IRubyObject replaceEntities;
private IRubyObject recovery;

public XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) {
super(ruby, rubyClass);
}

protected void initialize(Ruby runtime) {
replaceEntities = runtime.getTrue();
recovery = runtime.getFalse();
try {
parser = createParser();
} catch (SAXException se) {
Expand Down Expand Up @@ -199,6 +203,13 @@ protected void preParse(ThreadContext context,
IRubyObject handlerRuby,
NokogiriHandler handler) {
((XmlSaxParser) parser).setXmlDeclHandler(handler);
if(recovery.isTrue()) {
try {
((XmlSaxParser) parser).setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true);
} catch(Exception e) {
throw RaiseException.createNativeRaiseException(context.getRuntime(), e);
}
}
}

protected void postParse(ThreadContext context,
Expand Down Expand Up @@ -290,6 +301,28 @@ public IRubyObject get_replace_entities(ThreadContext context) {
return replaceEntities;
}

/**
* Can take a boolean assignment.
*
* @param context
* @param value
* @return
*/
@JRubyMethod(name = "recovery=")
public IRubyObject set_recovery(ThreadContext context,
IRubyObject value) {
if (!value.isTrue()) recovery = context.getRuntime().getFalse();
else recovery = context.getRuntime().getTrue();

return this;
}

@JRubyMethod(name="recovery")
public IRubyObject get_recovery(ThreadContext context) {
return recovery;
}



/**
* If the handler's document is a FragmentHandler, attempt to trim
Expand Down
40 changes: 40 additions & 0 deletions ext/nokogiri/xml_sax_parser_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,44 @@ static VALUE column(VALUE self)
return Qnil;
}

/*
* call-seq:
* recovery=(boolean)
*
* Should this parser recover from structural errors? It will not stop processing
* file on structural errors if if set to true
*/
static VALUE set_recovery(VALUE self, VALUE value)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);

if(value == Qfalse)
ctxt->recovery = 0;
else
ctxt->recovery = 1;

return value;
}

/*
* call-seq:
* recovery
*
* Should this parser recover from structural errors? It will not stop processing
* file on structural errors if if set to true
*/
static VALUE get_recovery(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);

if(ctxt->recovery == 0)
return Qfalse;
else
return Qtrue;
}

void init_xml_sax_parser_context()
{
VALUE nokogiri = rb_define_module("Nokogiri");
Expand All @@ -217,6 +255,8 @@ void init_xml_sax_parser_context()
rb_define_method(klass, "parse_with", parse_with, 1);
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
rb_define_method(klass, "recovery=", set_recovery, 1);
rb_define_method(klass, "recovery", get_recovery, 0);
rb_define_method(klass, "line", line, 0);
rb_define_method(klass, "column", column, 0);
}
16 changes: 16 additions & 0 deletions test/xml/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,22 @@ def test_parser_attributes

assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
end

def test_recovery_from_incorrect_xml
xml = <<-eoxml
<?xml version="1.0" ?><Root><Data><?xml version='1.0'?><Item>hey</Item></Data><Data><Item>hey yourself</Item></Data></Root>
eoxml

block_called = false
@parser.parse(xml) { |ctx|
block_called = true
ctx.recovery = true
}

assert block_called

assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
end
end
end
end
Expand Down
9 changes: 9 additions & 0 deletions test/xml/sax/test_parser_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ def test_replace_entities
assert_equal true, pc.replace_entities
end

def test_recovery
pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
pc.recovery = false
assert_equal false, pc.recovery

pc.recovery = true
assert_equal true, pc.recovery
end

def test_from_io
ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
assert ctx
Expand Down

0 comments on commit 4b7a7fb

Please sign in to comment.