Skip to content
Browse files

Support for parsing XML and HTML from Buffers

For XML, this gives access to automatic encoding detection based on
the XML declaration in the file.

Parsing from a Buffer should also be a bit faster than parsing from a
string, as the data in a buffer can be passed directly to libxml.
  • Loading branch information...
1 parent 375b51c commit 1f3f5e551c0f8cae84e9dba016b0ec3b42471cb1 @dpw dpw committed
Showing with 51 additions and 9 deletions.
  1. +25 −4 src/xml_document.cc
  2. BIN test/fixtures/parser-utf16.xml
  3. +15 −5 test/html_parser.js
  4. +11 −0 test/xml_parser.js
View
29 src/xml_document.cc
@@ -1,6 +1,7 @@
// Copyright 2009, Squish Tech, LLC.
#include <node.h>
+#include <node_buffer.h>
#include <libxml/HTMLparser.h>
#include <libxml/xmlschemas.h>
@@ -139,8 +140,18 @@ XmlDocument::FromHtml(const v8::Arguments& args)
xmlSetStructuredErrorFunc(reinterpret_cast<void *>(*errors),
XmlSyntaxError::PushToArray);
- v8::String::Utf8Value str(args[0]->ToString());
- htmlDocPtr doc = htmlReadMemory(*str, str.length(), NULL, NULL, 0);
+ htmlDocPtr doc;
+ if (!node::Buffer::HasInstance(args[0])) {
+ // Parse a string
+ v8::String::Utf8Value str(args[0]->ToString());
+ doc = htmlReadMemory(*str, str.length(), NULL, NULL, 0);
+ }
+ else {
+ // Parse a buffer
+ v8::Local<v8::Object> buf = args[0]->ToObject();
+ doc = htmlReadMemory(node::Buffer::Data(buf), node::Buffer::Length(buf),
+ NULL, NULL, 0);
+ }
xmlSetStructuredErrorFunc(NULL, NULL);
@@ -170,8 +181,18 @@ XmlDocument::FromXml(const v8::Arguments& args)
xmlSetStructuredErrorFunc(reinterpret_cast<void *>(*errors),
XmlSyntaxError::PushToArray);
- v8::String::Utf8Value str(args[0]->ToString());
- xmlDocPtr doc = xmlReadMemory(*str, str.length(), NULL, "UTF-8", 0);
+ xmlDocPtr doc;
+ if (!node::Buffer::HasInstance(args[0])) {
+ // Parse a string
+ v8::String::Utf8Value str(args[0]->ToString());
+ doc = xmlReadMemory(*str, str.length(), NULL, "UTF-8", 0);
+ }
+ else {
+ // Parse a buffer
+ v8::Local<v8::Object> buf = args[0]->ToObject();
+ doc = xmlReadMemory(node::Buffer::Data(buf), node::Buffer::Length(buf),
+ NULL, NULL, 0);
+ }
xmlSetStructuredErrorFunc(NULL, NULL);
View
BIN test/fixtures/parser-utf16.xml
Binary file not shown.
View
20 test/html_parser.js
@@ -13,12 +13,22 @@ function make_error(object) {
module.exports.parse = function(assert) {
var filename = __dirname + '/fixtures/parser.html';
- var str = fs.readFileSync(filename, 'utf8');
- var doc = libxml.parseHtml(str);
- assert.equal('html', doc.root().name());
- assert.equal('Test HTML document', doc.get('head/title').text());
- assert.equal('HTML content!', doc.get('body/span').text());
+ function attempt_parse(encoding) {
+ var str = fs.readFileSync(filename, encoding);
+
+ var doc = libxml.parseHtml(str);
+ assert.equal('html', doc.root().name());
+ assert.equal('Test HTML document', doc.get('head/title').text());
+ assert.equal('HTML content!', doc.get('body/span').text());
+ }
+
+ // Parse via a string
+ attempt_parse('utf-8');
+
+ // Parse via a Buffer
+ attempt_parse(null);
+
assert.done();
};
View
11 test/xml_parser.js
@@ -19,6 +19,17 @@ module.exports.parse = function(assert) {
assert.done();
};
+module.exports.parse_buffer = function(assert) {
+ var filename = __dirname + '/fixtures/parser-utf16.xml';
+ var buf = fs.readFileSync(filename);
+
+ var doc = libxml.parseXml(buf);
+ assert.equal('1.0', doc.version());
+ assert.equal('UTF-16', doc.encoding());
+ assert.equal('root', doc.root().name());
+ assert.done();
+};
+
module.exports.parse_synonym = function(assert) {
assert.strictEqual(libxml.parseXml, libxml.parseXmlString);
assert.done();

0 comments on commit 1f3f5e5

Please sign in to comment.
Something went wrong with that request. Please try again.