Skip to content

Commit

Permalink
fix: XML::Reader error handling during xmlTextReaderExpand
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Dec 6, 2022
1 parent d33f56e commit 549ac3a
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/
* Prefer `ruby_xmalloc` to `malloc` within the C extension. [[#2480](https://github.com/sparklemotion/nokogiri/issues/2480)] (Thanks, [@Garfield96](https://github.com/Garfield96)!)
* Installation from source on systems missing libiconv will once again generate a helpful error message (broken since v1.11.0). [[#2505](https://github.com/sparklemotion/nokogiri/issues/2505)]
* Empty CSS selectors now raise a clearer `Nokogiri::CSS::SyntaxError` message, "empty CSS selector". Previously the exception raised from the bowels of `racc` was "unexpected '$' after ''". [[#2700](https://github.com/sparklemotion/nokogiri/issues/2700)]
* `XML::Reader` parsing errors encountered during `Reader#attribute_hash` and `Reader#namespaces` now raise an `XML::SyntaxError`. Previously these methods would return `nil` and users would generally experience `NoMethodErrors` from elsewhere in the code.


### Deprecated
Expand Down
54 changes: 40 additions & 14 deletions ext/nokogiri/xml_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,26 +124,37 @@ attributes_eh(VALUE self)
* Get a hash of namespaces for this Node
*/
static VALUE
namespaces(VALUE self)
rb_xml_reader_namespaces(VALUE rb_reader)
{
xmlTextReaderPtr reader;
xmlNodePtr ptr;
VALUE attr ;

Data_Get_Struct(self, xmlTextReader, reader);
VALUE rb_namespaces = rb_hash_new() ;
xmlTextReaderPtr c_reader;
xmlNodePtr c_node;
VALUE rb_errors;

attr = rb_hash_new() ;
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);

if (! has_attributes(reader)) {
return attr ;
if (! has_attributes(c_reader)) {
return rb_namespaces ;
}

ptr = xmlTextReaderExpand(reader);
if (ptr == NULL) { return Qnil; }
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);

Nokogiri_xml_node_namespaces(ptr, attr);
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
c_node = xmlTextReaderExpand(c_reader);
xmlSetStructuredErrorFunc(NULL, NULL);

return attr ;
if (c_node == NULL) {
if (RARRAY_LEN(rb_errors) > 0) {
VALUE rb_error = rb_ary_entry(rb_errors, 0);
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
}
return Qnil;
}

Nokogiri_xml_node_namespaces(c_node, rb_namespaces);

return rb_namespaces ;
}

/*
Expand Down Expand Up @@ -202,14 +213,29 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
xmlTextReaderPtr c_reader;
xmlNodePtr c_node;
xmlAttrPtr c_property;
VALUE rb_errors;

Data_Get_Struct(rb_reader, xmlTextReader, c_reader);

if (!has_attributes(c_reader)) {
return rb_attributes;
}

rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);

xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
c_node = xmlTextReaderExpand(c_reader);
xmlSetStructuredErrorFunc(NULL, NULL);

if (c_node == NULL) {
if (RARRAY_LEN(rb_errors) > 0) {
VALUE rb_error = rb_ary_entry(rb_errors, 0);
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
}
return Qnil;
}

c_property = c_node->properties;
while (c_property != NULL) {
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
Expand Down Expand Up @@ -756,7 +782,7 @@ noko_init_xml_reader()
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
rb_define_method(cNokogiriXmlReader, "name", name, 0);
rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
rb_define_method(cNokogiriXmlReader, "namespaces", namespaces, 0);
rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
Expand Down
32 changes: 32 additions & 0 deletions test/xml/test_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,38 @@ def test_nonexistent_attribute
reader.read # el
assert_nil(reader.attribute("other"))
end

def test_broken_markup_attribute_hash
xml = <<~XML
<root><foo bar="asdf">
XML
reader = Nokogiri::XML::Reader(xml)
reader.read # root
reader.read # foo

assert_equal("foo", reader.name)
assert_equal(0, reader.errors.length)
assert_raises(Nokogiri::XML::SyntaxError) do
reader.attribute_hash
end
assert_equal(1, reader.errors.length)
end

def test_broken_markup_namespaces
xml = <<~XML
<root><foo bar="asdf">
XML
reader = Nokogiri::XML::Reader(xml)
reader.read # root
reader.read # foo

assert_equal("foo", reader.name)
assert_equal(0, reader.errors.length)
assert_raises(Nokogiri::XML::SyntaxError) do
reader.namespaces
end
assert_equal(1, reader.errors.length)
end
end
end
end

0 comments on commit 549ac3a

Please sign in to comment.