Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean up XML::Schema #2316

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ end
### Fixed

* [JRuby] Fix NPE in Schema parsing when an imported resource doesn't have a `systemId`. [[#2296](https://github.com/sparklemotion/nokogiri/issues/2296)] (Thanks, [@pepijnve](https://github.com/pepijnve)!)
* [JRuby] Fix `Schema#validate` to only return the most recent Document's errors. Previously, if multiple documents were validated, this method returned the accumulated errors of all previous documents. [#1282]
* [JRuby] Fix `Schema#validate` to not clobber the `@errors` instance variable. [#1282]


## 1.12.3 / 2021-08-10
Expand Down
2 changes: 1 addition & 1 deletion ext/java/nokogiri/XmlSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ public class XmlSchema extends RubyObject
IRubyObject
validate_document_or_file(ThreadContext context, XmlDocument xmlDocument)
{
RubyArray errors = (RubyArray) this.getInstanceVariable("@errors");
RubyArray errors = (RubyArray)context.runtime.newEmptyArray();
ErrorHandler errorHandler = new SchemaErrorHandler(context.runtime, errors);
setErrorHandler(errorHandler);

Expand Down
53 changes: 35 additions & 18 deletions ext/nokogiri/xml_relax_ng.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,9 @@ dealloc(xmlRelaxNGPtr schema)
NOKOGIRI_DEBUG_END(schema);
}

/*
* call-seq:
* validate_document(document)
*
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
*/

static VALUE
validate_document(VALUE self, VALUE document)
rb_xml_relax_ng_validate_document(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlRelaxNGPtr schema;
Expand Down Expand Up @@ -51,14 +46,25 @@ validate_document(VALUE self, VALUE document)
return errors;
}


/*
* call-seq:
* read_memory(string)
* :call-seq:
* read_memory(input) → Nokogiri::XML::RelaxNG
* read_memory(input, parse_options) → Nokogiri::XML::RelaxNG
*
* Parse a RELAX NG schema definition and create a new Schema object.
*
* Create a new RelaxNG from the contents of +string+
* 💡 Note that the limitation of this method relative to RelaxNG.new is that +input+ must be type
* String, whereas RelaxNG.new also supports IO types.
*
* [Parameters]
* - +input+ (String) RELAX NG schema definition
* - +parse_options+ (Nokogiri::XML::ParseOptions) Defaults to ParseOptions::DEFAULT_SCHEMA
*
* [Returns] Nokogiri::XML::RelaxNG
*/
static VALUE
read_memory(int argc, VALUE *argv, VALUE klass)
rb_xml_relax_ng_s_read_memory(int argc, VALUE *argv, VALUE klass)
{
VALUE content;
VALUE parse_options;
Expand Down Expand Up @@ -109,14 +115,25 @@ read_memory(int argc, VALUE *argv, VALUE klass)
return rb_schema;
}


/*
* call-seq:
* from_document(doc)
* :call-seq:
* from_document(document) → Nokogiri::XML::RelaxNG
* from_document(document, parse_options) → Nokogiri::XML::RelaxNG
*
* Create a Schema from an already-parsed RELAX NG schema definition document.
*
* [Parameters]
* - +document+ (XML::Document) A XML::Document object representing the parsed RELAX NG
* - +parse_options+ (Nokogiri::XML::ParseOptions) ⚠ Unused
*
* [Returns] Nokogiri::XML::RelaxNG
*
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
* ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
* future functionality.
*/
static VALUE
from_document(int argc, VALUE *argv, VALUE klass)
rb_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE klass)
{
VALUE document;
VALUE parse_options;
Expand Down Expand Up @@ -178,8 +195,8 @@ noko_init_xml_relax_ng()
assert(cNokogiriXmlSchema);
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);

rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", rb_xml_relax_ng_s_read_memory, -1);
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", rb_xml_relax_ng_s_from_document, -1);

rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", rb_xml_relax_ng_validate_document, 1);
}
70 changes: 40 additions & 30 deletions ext/nokogiri/xml_schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,8 @@ dealloc(xmlSchemaPtr schema)
NOKOGIRI_DEBUG_END(schema);
}

/*
* call-seq:
* validate_document(document)
*
* Validate a Nokogiri::XML::Document against this Schema.
*/
static VALUE
validate_document(VALUE self, VALUE document)
rb_xml_schema_validate_document(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlSchemaPtr schema;
Expand Down Expand Up @@ -51,14 +45,8 @@ validate_document(VALUE self, VALUE document)
return errors;
}

/*
* call-seq:
* validate_file(filename)
*
* Validate a file against this Schema.
*/
static VALUE
validate_file(VALUE self, VALUE rb_filename)
rb_xml_schema_validate_file(VALUE self, VALUE rb_filename)
{
xmlSchemaPtr schema;
xmlSchemaValidCtxtPtr valid_ctxt;
Expand Down Expand Up @@ -93,13 +81,24 @@ validate_file(VALUE self, VALUE rb_filename)
}

/*
* call-seq:
* read_memory(string)
* :call-seq:
* read_memory(input) → Nokogiri::XML::Schema
* read_memory(input, parse_options) → Nokogiri::XML::Schema
*
* Parse an XSD schema definition and create a new Schema object.
*
* 💡 Note that the limitation of this method relative to Schema.new is that +input+ must be type
* String, whereas Schema.new also supports IO types.
*
* [parameters]
* - +input+ (String) XSD schema definition
* - +parse_options+ (Nokogiri::XML::ParseOptions)
* Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
*
* Create a new Schema from the contents of +string+
* [Returns] Nokogiri::XML::Schema
*/
static VALUE
read_memory(int argc, VALUE *argv, VALUE klass)
rb_xml_schema_s_read_memory(int argc, VALUE *argv, VALUE klass)
{
VALUE content;
VALUE parse_options;
Expand Down Expand Up @@ -162,6 +161,7 @@ read_memory(int argc, VALUE *argv, VALUE klass)
return rb_schema;
}


/* Schema creation will remove and deallocate "blank" nodes.
* If those blank nodes have been exposed to Ruby, they could get freed
* out from under the VALUE pointer. This function checks to see if any of
Expand All @@ -188,14 +188,23 @@ has_blank_nodes_p(VALUE cache)
return 0;
}


/*
* call-seq:
* from_document(doc)
* :call-seq:
* from_document(document) → Nokogiri::XML::Schema
* from_document(document, parse_options) → Nokogiri::XML::Schema
*
* Create a Schema from an already-parsed XSD schema definition document.
*
* Create a new Schema from the Nokogiri::XML::Document +doc+
* [Parameters]
* - +document+ (XML::Document) A document object representing the parsed XSD
* - +parse_options+ (Nokogiri::XML::ParseOptions)
* Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
*
* [Returns] Nokogiri::XML::Schema
*/
static VALUE
from_document(int argc, VALUE *argv, VALUE klass)
rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass)
{
VALUE document;
VALUE parse_options;
Expand All @@ -206,7 +215,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
VALUE errors;
VALUE rb_schema;
int scanned_args = 0;
xmlExternalEntityLoader old_loader = 0;
xmlExternalEntityLoader saved_loader = 0;

scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);

Expand Down Expand Up @@ -236,14 +245,14 @@ from_document(int argc, VALUE *argv, VALUE klass)
#endif

if (parse_options_int & XML_PARSE_NONET) {
old_loader = xmlGetExternalEntityLoader();
saved_loader = xmlGetExternalEntityLoader();
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
}

schema = xmlSchemaParse(ctx);

if (old_loader) {
xmlSetExternalEntityLoader(old_loader);
if (saved_loader) {
xmlSetExternalEntityLoader(saved_loader);
}

xmlSetStructuredErrorFunc(NULL, NULL);
Expand All @@ -269,16 +278,17 @@ from_document(int argc, VALUE *argv, VALUE klass)
return Qnil;
}


void
noko_init_xml_schema()
{
cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);

rb_undef_alloc_func(cNokogiriXmlSchema);

rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", rb_xml_schema_s_read_memory, -1);
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", rb_xml_schema_s_from_document, -1);

rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1);
rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1);
rb_define_private_method(cNokogiriXmlSchema, "validate_document", rb_xml_schema_validate_document, 1);
rb_define_private_method(cNokogiriXmlSchema, "validate_file", rb_xml_schema_validate_file, 1);
}
70 changes: 49 additions & 21 deletions lib/nokogiri/xml/relax_ng.rb
Original file line number Diff line number Diff line change
@@ -1,38 +1,66 @@
# coding: utf-8
# frozen_string_literal: true

module Nokogiri
module XML
class << self
###
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
# See Nokogiri::XML::RelaxNG for an example.
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
RelaxNG.new(string_or_io, options)
#
# :call-seq:
# RelaxNg(input) → Nokogiri::XML::RelaxNG
# RelaxNg(input, parse_options) → Nokogiri::XML::RelaxNG
#
# Parse a RELAX NG schema definition and create a new Schema object. This is a convenience
# method for Nokogiri::XML::RelaxNG.new
#
# See related: Nokogiri::XML::RelaxNG.new
#
# [Parameters]
# - +input+ (String, IO) RELAX NG schema definition
# - +parse_options+ (Nokogiri::XML::ParseOptions)
# Defaults to ParseOptions::DEFAULT_SCHEMA
#
# [Returns] Nokogiri::XML::RelaxNG
#
def RelaxNG(input, parse_options = ParseOptions::DEFAULT_SCHEMA)
RelaxNG.new(input, parse_options)
end
end

###
# Nokogiri::XML::RelaxNG is used for validating XML against a
# RelaxNG schema.
# Nokogiri::XML::RelaxNG is used for validating XML against a RELAX NG schema definition.
#
# == Synopsis
# *Example:* Determine whether an XML document is valid.
#
# Validate an XML document against a RelaxNG schema. Loop over the errors
# that are returned and print them out:
# schema = Nokogiri::XML::RelaxNG(File.read(RELAX_NG_FILE))
# doc = Nokogiri::XML(File.read(XML_FILE))
# schema.valid?(doc) # Boolean
#
# schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
# doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
# *Example:* Validate an XML document against a RelaxNG schema, and capture any errors that are found.
#
# schema.validate(doc).each do |error|
# puts error.message
# end
# schema = Nokogiri::XML::RelaxNG(File.open(RELAX_NG_FILE))
# doc = Nokogiri::XML(File.open(XML_FILE))
# errors = schema.validate(doc) # Array<SyntaxError>
#
# The list of errors are Nokogiri::XML::SyntaxError objects.
#
# NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
# underlying parsing libraries to access network resources. This is counter to Nokogiri's
# "untrusted by default" security policy, but is a limitation of the underlying libraries.
# ⚠ RELAX NG input is always treated as *trusted*, meaning that the underlying parsing libraries
# *will access network resources*. This is counter to Nokogiri's "untrusted by default" security
# policy, but is an unfortunate limitation of the underlying libraries. Please do not use this
# class for untrusted schema documents.
class RelaxNG < Nokogiri::XML::Schema
# :call-seq:
# new(input) → Nokogiri::XML::RelaxNG
# new(input, parse_options) → Nokogiri::XML::RelaxNG
#
# Parse a RELAX NG schema definition and create a new Schema object.
#
# [Parameters]
# - +input+ (String, IO) RELAX NG schema definition
# - +parse_options+ (Nokogiri::XML::ParseOptions)
# Defaults to ParseOptions::DEFAULT_SCHEMA
#
# [Returns] Nokogiri::XML::RelaxNG
#
def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA)
from_document(Nokogiri::XML(input), parse_options)
end
end
end
end