Skip to content

Commit

Permalink
Merge pull request #225 from JoshData/parseXml_options_argument
Browse files Browse the repository at this point in the history
add an options argument to parseXml
  • Loading branch information
defunctzombie committed Apr 11, 2014
2 parents 7ea6858 + 0e82b95 commit 914ace1
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
4 changes: 2 additions & 2 deletions lib/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ module.exports.fromHtml = function(string, opts) {
/// parse a string into a xml document
/// @param string xml string to parse
/// @return a Document
module.exports.fromXml = function(string) {
return bindings.fromXml(string);
module.exports.fromXml = function(string, options) {
return bindings.fromXml(string, options || {});
};

43 changes: 41 additions & 2 deletions src/xml_document.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,43 @@ XmlDocument::FromHtml(const v8::Arguments& args)
return scope.Close(doc_handle);
}

int getXmlParserOption2(v8::Local<v8::Object> props, const char *key, int value) {
v8::Local<v8::String> key2 = v8::String::New(key);
v8::Local<v8::Boolean> val = props->Get(key2)->ToBoolean();
return val->BooleanValue() ? value : 0;
}

xmlParserOption getXmlParserOption(v8::Local<v8::Object> props) {
int ret = 0;

// http://xmlsoft.org/html/libxml-parser.html#xmlParserOption
ret |= getXmlParserOption2(props, "recover", XML_PARSE_RECOVER); // recover on errors
ret |= getXmlParserOption2(props, "noent", XML_PARSE_NOENT); // substitute entities
ret |= getXmlParserOption2(props, "dtdload", XML_PARSE_DTDLOAD); // load the external subset
ret |= getXmlParserOption2(props, "dtdattr", XML_PARSE_DTDATTR); // default DTD attributes
ret |= getXmlParserOption2(props, "dtdvalid", XML_PARSE_DTDVALID); // validate with the DTD
ret |= getXmlParserOption2(props, "noerror", XML_PARSE_NOERROR); // suppress error reports
ret |= getXmlParserOption2(props, "nowarning", XML_PARSE_NOWARNING); // suppress warning reports
ret |= getXmlParserOption2(props, "pedantic", XML_PARSE_PEDANTIC); // pedantic error reporting
ret |= getXmlParserOption2(props, "noblanks", XML_PARSE_NOBLANKS); // remove blank nodes
ret |= getXmlParserOption2(props, "sax1", XML_PARSE_SAX1); // use the SAX1 interface internally
ret |= getXmlParserOption2(props, "xinclude", XML_PARSE_XINCLUDE); // Implement XInclude substitition
ret |= getXmlParserOption2(props, "nonet", XML_PARSE_NONET); // Forbid network access
ret |= getXmlParserOption2(props, "nodict", XML_PARSE_NODICT); // Do not reuse the context dictionnary
ret |= getXmlParserOption2(props, "nsclean", XML_PARSE_NSCLEAN); // remove redundant namespaces declarations
ret |= getXmlParserOption2(props, "nocdata", XML_PARSE_NOCDATA); // merge CDATA as text nodes
ret |= getXmlParserOption2(props, "noxincnode", XML_PARSE_NOXINCNODE); // do not generate XINCLUDE START/END nodes
ret |= getXmlParserOption2(props, "compact", XML_PARSE_COMPACT); // compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
ret |= getXmlParserOption2(props, "old10", XML_PARSE_OLD10); // parse using XML-1.0 before update 5
ret |= getXmlParserOption2(props, "nobasefix", XML_PARSE_NOBASEFIX); // do not fixup XINCLUDE xml:base uris
ret |= getXmlParserOption2(props, "huge", XML_PARSE_HUGE); // relax any hardcoded limit from the parser
ret |= getXmlParserOption2(props, "oldsax", XML_PARSE_OLDSAX); // parse using SAX2 interface before 2.7.0
ret |= getXmlParserOption2(props, "ignore_enc", XML_PARSE_IGNORE_ENC); // ignore internal document encoding hint
ret |= getXmlParserOption2(props, "big_lines", XML_PARSE_BIG_LINES); // Store big lines numbers in text PSVI field

return (xmlParserOption)ret;
}

v8::Handle<v8::Value>
XmlDocument::FromXml(const v8::Arguments& args)
{
Expand All @@ -242,17 +279,19 @@ XmlDocument::FromXml(const v8::Arguments& args)
xmlSetStructuredErrorFunc(reinterpret_cast<void *>(*errors),
XmlSyntaxError::PushToArray);

xmlParserOption opts = getXmlParserOption(args[1]->ToObject());

xmlDocPtr doc;
if (!node::Buffer::HasInstance(args[0])) {
// Parse a string
v8::String::Utf8Value str(args[0]->ToString());
doc = xmlReadMemory(*str, str.length(), NULL, "UTF-8", 0);
doc = xmlReadMemory(*str, str.length(), NULL, "UTF-8", opts);
}
else {
// Parse a buffer
v8::Local<v8::Object> buf = args[0]->ToObject();
doc = xmlReadMemory(node::Buffer::Data(buf), node::Buffer::Length(buf),
NULL, NULL, 0);
NULL, NULL, opts);
}

xmlSetStructuredErrorFunc(NULL, NULL);
Expand Down
16 changes: 16 additions & 0 deletions test/xml_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,19 @@ module.exports.fatal_error = function(assert) {
assert.done();
};

module.exports.parse_options = function(assert) {
function test_parser_option(input, options, expected) {
var output = libxml.parseXml(input, options).toString();
output = output.replace(/^<\?xml version="1.0" encoding="UTF-8"\?>\n/, '');
output = output.replace(/\n$/, '');
assert.equal(output, expected);
}

test_parser_option("<x>&</x>", { recover: true }, "<x/>") // without this option, this document would raise an exception during parsing
test_parser_option("<!DOCTYPE x [ <!ENTITY foo 'bar'> ]> <x>&foo;</x>", { noent: true }, '<!DOCTYPE x [\n<!ENTITY foo "bar">\n]>\n<x>bar</x>') // foo => bar
test_parser_option("<x> <a>123</a> </x>", { }, "<x> <a>123</a> </x>") // no indentation even though the toString() default called for formatting
test_parser_option("<x> <a>123</a> </x>", { noblanks: true }, "<x>\n <a>123</a>\n</x>") // ah, now we have indentation!
test_parser_option("<x><![CDATA[hi]]></x>", { }, "<x><![CDATA[hi]]></x>") // normally CDATA stays as CDATA
test_parser_option("<x><![CDATA[hi]]></x>", { nocdata: true }, "<x>hi</x>") // but here CDATA is removed!
assert.done();
};

0 comments on commit 914ace1

Please sign in to comment.