Skip to content

Commit

Permalink
adding relaxng and xml schema support. GH sparklemotion#4
Browse files Browse the repository at this point in the history
  • Loading branch information
tenderlove committed Mar 27, 2009
1 parent 01d355f commit 469d528
Show file tree
Hide file tree
Showing 18 changed files with 593 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rdoc
Expand Up @@ -12,6 +12,8 @@
* Nokogiri::HTML::Document#meta_encoding for getting the meta encoding
* Nokogiri::HTML::Document#meta_encoding= for setting the meta encoding
* Nokogiri::XML::Document#encoding= to set the document encoding
* Nokogiri::XML::Schema for validating documents against XSD schema
* Nokogiri::XML::RelaxNG for validating documents against RelaxNG schema

* Bugfixes

Expand Down
12 changes: 12 additions & 0 deletions Manifest.txt
Expand Up @@ -38,10 +38,14 @@ ext/nokogiri/xml_processing_instruction.c
ext/nokogiri/xml_processing_instruction.h
ext/nokogiri/xml_reader.c
ext/nokogiri/xml_reader.h
ext/nokogiri/xml_relax_ng.c
ext/nokogiri/xml_relax_ng.h
ext/nokogiri/xml_sax_parser.c
ext/nokogiri/xml_sax_parser.h
ext/nokogiri/xml_sax_push_parser.c
ext/nokogiri/xml_sax_push_parser.h
ext/nokogiri/xml_schema.c
ext/nokogiri/xml_schema.h
ext/nokogiri/xml_syntax_error.c
ext/nokogiri/xml_syntax_error.h
ext/nokogiri/xml_text.c
Expand Down Expand Up @@ -95,10 +99,12 @@ lib/nokogiri/xml/node_set.rb
lib/nokogiri/xml/notation.rb
lib/nokogiri/xml/processing_instruction.rb
lib/nokogiri/xml/reader.rb
lib/nokogiri/xml/relax_ng.rb
lib/nokogiri/xml/sax.rb
lib/nokogiri/xml/sax/document.rb
lib/nokogiri/xml/sax/parser.rb
lib/nokogiri/xml/sax/push_parser.rb
lib/nokogiri/xml/schema.rb
lib/nokogiri/xml/syntax_error.rb
lib/nokogiri/xml/text.rb
lib/nokogiri/xml/xpath.rb
Expand All @@ -112,9 +118,13 @@ test/css/test_nthiness.rb
test/css/test_parser.rb
test/css/test_tokenizer.rb
test/css/test_xpath_visitor.rb
test/files/address_book.rlx
test/files/address_book.xml
test/files/dont_hurt_em_why.xml
test/files/exslt.xml
test/files/exslt.xslt
test/files/po.xml
test/files/po.xsd
test/files/staff.xml
test/files/staff.xslt
test/files/tlm.html
Expand Down Expand Up @@ -167,6 +177,8 @@ test/xml/test_node_encoding.rb
test/xml/test_node_set.rb
test/xml/test_processing_instruction.rb
test/xml/test_reader_encoding.rb
test/xml/test_relax_ng.rb
test/xml/test_schema.rb
test/xml/test_text.rb
test/xml/test_unparented_node.rb
test/xml/test_xpath.rb
2 changes: 2 additions & 0 deletions ext/nokogiri/native.c
Expand Up @@ -70,4 +70,6 @@ void Init_native()
init_xslt_stylesheet();
init_xml_syntax_error();
init_html_entity_lookup();
init_xml_schema();
init_xml_relax_ng();
}
3 changes: 3 additions & 0 deletions ext/nokogiri/native.h
Expand Up @@ -9,6 +9,7 @@
#include <libxml/xpathInternals.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlsave.h>
#include <libxml/xmlschemas.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>

Expand Down Expand Up @@ -88,6 +89,8 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
#include <html_sax_parser.h>
#include <xslt_stylesheet.h>
#include <xml_syntax_error.h>
#include <xml_schema.h>
#include <xml_relax_ng.h>

extern VALUE mNokogiri ;
extern VALUE mNokogiriXml ;
Expand Down
102 changes: 102 additions & 0 deletions ext/nokogiri/xml_relax_ng.c
@@ -0,0 +1,102 @@
#include <xml_relax_ng.h>

static void dealloc(xmlRelaxNGPtr schema)
{
NOKOGIRI_DEBUG_START(doc);
xmlRelaxNGFree(schema);
NOKOGIRI_DEBUG_END(doc);
}

/*
* call-seq:
* validate_document(document)
*
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
*/
static VALUE validate_document(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlRelaxNGPtr schema;

Data_Get_Struct(self, xmlRelaxNG, schema);
Data_Get_Struct(document, xmlDoc, doc);

VALUE errors = rb_ary_new();

xmlRelaxNGValidCtxtPtr valid_ctxt = xmlRelaxNGNewValidCtxt(schema);

if(NULL == valid_ctxt) {
// we have a problem
rb_raise(rb_eRuntimeError, "Could not create a validation context");
}

xmlRelaxNGSetValidStructuredErrors(
valid_ctxt,
Nokogiri_error_array_pusher,
(void *)errors
);

xmlRelaxNGValidateDoc(valid_ctxt, doc);

xmlRelaxNGFreeValidCtxt(valid_ctxt);

return errors;
}

/*
* call-seq:
* read_memory(string)
*
* Create a new RelaxNG from the contents of +string+
*/
static VALUE read_memory(VALUE klass, VALUE content)
{
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
(const char *)StringValuePtr(content),
RSTRING_LEN(content)
);

VALUE errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);

xmlRelaxNGSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);

xmlRelaxNGPtr schema = xmlRelaxNGParse(ctx);

xmlSetStructuredErrorFunc(NULL, NULL);
xmlRelaxNGFreeParserCtxt(ctx);

if(NULL == schema) {
xmlErrorPtr error = xmlGetLastError();
if(error)
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
);
else
rb_raise(rb_eRuntimeError, "Could not parse document");

return Qnil;
}

VALUE rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);

return rb_schema;
}

VALUE cNokogiriXmlRelaxNG;
void init_xml_relax_ng()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);

cNokogiriXmlRelaxNG = klass;

rb_define_singleton_method(klass, "read_memory", read_memory, 1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
}
9 changes: 9 additions & 0 deletions ext/nokogiri/xml_relax_ng.h
@@ -0,0 +1,9 @@
#ifndef NOKOGIRI_XML_RELAX_NG
#define NOKOGIRI_XML_RELAX_NG

#include <native.h>

void init_xml_relax_ng();

extern VALUE cNokogiriXmlRelaxNG;
#endif
103 changes: 103 additions & 0 deletions ext/nokogiri/xml_schema.c
@@ -0,0 +1,103 @@
#include <xml_schema.h>

static void dealloc(xmlSchemaPtr schema)
{
NOKOGIRI_DEBUG_START(doc);
xmlSchemaFree(schema);
NOKOGIRI_DEBUG_END(doc);
}

/*
* call-seq:
* validate_document(document)
*
* Validate a Nokogiri::XML::Document against this Schema.
*/
static VALUE validate_document(VALUE self, VALUE document)
{
xmlDocPtr doc;
xmlSchemaPtr schema;

Data_Get_Struct(self, xmlSchema, schema);
Data_Get_Struct(document, xmlDoc, doc);

VALUE errors = rb_ary_new();

xmlSchemaValidCtxtPtr valid_ctxt = xmlSchemaNewValidCtxt(schema);

if(NULL == valid_ctxt) {
// we have a problem
rb_raise(rb_eRuntimeError, "Could not create a validation context");
}

xmlSchemaSetValidStructuredErrors(
valid_ctxt,
Nokogiri_error_array_pusher,
(void *)errors
);

xmlSchemaValidateDoc(valid_ctxt, doc);

xmlSchemaFreeValidCtxt(valid_ctxt);

return errors;
}

/*
* call-seq:
* read_memory(string)
*
* Create a new Schema from the contents of +string+
*/
static VALUE read_memory(VALUE klass, VALUE content)
{

xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
(const char *)StringValuePtr(content),
RSTRING_LEN(content)
);

VALUE errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);

xmlSchemaSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);

xmlSchemaPtr schema = xmlSchemaParse(ctx);

xmlSetStructuredErrorFunc(NULL, NULL);
xmlSchemaFreeParserCtxt(ctx);

if(NULL == schema) {
xmlErrorPtr error = xmlGetLastError();
if(error)
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
);
else
rb_raise(rb_eRuntimeError, "Could not parse document");

return Qnil;
}

VALUE rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);

return rb_schema;
}

VALUE cNokogiriXmlSchema;
void init_xml_schema()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject);

cNokogiriXmlSchema = klass;

rb_define_singleton_method(klass, "read_memory", read_memory, 1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
}
9 changes: 9 additions & 0 deletions ext/nokogiri/xml_schema.h
@@ -0,0 +1,9 @@
#ifndef NOKOGIRI_XML_SCHEMA
#define NOKOGIRI_XML_SCHEMA

#include <native.h>

void init_xml_schema();

extern VALUE cNokogiriXmlSchema;
#endif
2 changes: 2 additions & 0 deletions lib/nokogiri/xml.rb
Expand Up @@ -18,6 +18,8 @@
require 'nokogiri/xml/notation'
require 'nokogiri/xml/element'
require 'nokogiri/xml/entity_declaration'
require 'nokogiri/xml/schema'
require 'nokogiri/xml/relax_ng'

module Nokogiri
class << self
Expand Down
29 changes: 29 additions & 0 deletions lib/nokogiri/xml/relax_ng.rb
@@ -0,0 +1,29 @@
module Nokogiri
module XML
class << self
def RelaxNG string_or_io
RelaxNG.new(string_or_io)
end
end

###
# Nokogiri::XML::RelaxNG is used for validating XML against a
# RelaxNG schema.
#
# == Synopsis
#
# Validate an XML document against a RelaxNG schema. Loop over the errors
# that are returned and print them out:
#
# schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
# doc = Nokogiri::XML(File.read(ADDRESS_XML_FILE))
#
# schema.validate(doc).each do |error|
# puts error.message
# end
#
# The list of errors are Nokogiri::XML::SyntaxError objects.
class RelaxNG < Nokogiri::XML::Schema
end
end
end

0 comments on commit 469d528

Please sign in to comment.