Permalink
Browse files

Support for parsing XML and HTML from Buffers

For XML, this gives access to automatic encoding detection based on
the XML declaration in the file.

Parsing from a Buffer should also be a bit faster than parsing from a
string, as the data in a buffer can be passed directly to libxml.
  • Loading branch information...
dpw committed Jul 7, 2012
1 parent 375b51c commit 1f3f5e551c0f8cae84e9dba016b0ec3b42471cb1
Showing with 51 additions and 9 deletions.
  1. +25 −4 src/xml_document.cc
  2. BIN test/fixtures/parser-utf16.xml
  3. +15 −5 test/html_parser.js
  4. +11 −0 test/xml_parser.js
View
@@ -1,6 +1,7 @@
// Copyright 2009, Squish Tech, LLC.
#include <node.h>
+#include <node_buffer.h>
#include <libxml/HTMLparser.h>
#include <libxml/xmlschemas.h>
@@ -139,8 +140,18 @@ XmlDocument::FromHtml(const v8::Arguments& args)
xmlSetStructuredErrorFunc(reinterpret_cast<void *>(*errors),
XmlSyntaxError::PushToArray);
- v8::String::Utf8Value str(args[0]->ToString());
- htmlDocPtr doc = htmlReadMemory(*str, str.length(), NULL, NULL, 0);
+ htmlDocPtr doc;
+ if (!node::Buffer::HasInstance(args[0])) {
+ // Parse a string
+ v8::String::Utf8Value str(args[0]->ToString());
+ doc = htmlReadMemory(*str, str.length(), NULL, NULL, 0);
+ }
+ else {
+ // Parse a buffer
+ v8::Local<v8::Object> buf = args[0]->ToObject();
+ doc = htmlReadMemory(node::Buffer::Data(buf), node::Buffer::Length(buf),
+ NULL, NULL, 0);
+ }
xmlSetStructuredErrorFunc(NULL, NULL);
@@ -170,8 +181,18 @@ XmlDocument::FromXml(const v8::Arguments& args)
xmlSetStructuredErrorFunc(reinterpret_cast<void *>(*errors),
XmlSyntaxError::PushToArray);
- v8::String::Utf8Value str(args[0]->ToString());
- xmlDocPtr doc = xmlReadMemory(*str, str.length(), NULL, "UTF-8", 0);
+ xmlDocPtr doc;
+ if (!node::Buffer::HasInstance(args[0])) {
+ // Parse a string
+ v8::String::Utf8Value str(args[0]->ToString());
+ doc = xmlReadMemory(*str, str.length(), NULL, "UTF-8", 0);
+ }
+ else {
+ // Parse a buffer
+ v8::Local<v8::Object> buf = args[0]->ToObject();
+ doc = xmlReadMemory(node::Buffer::Data(buf), node::Buffer::Length(buf),
+ NULL, NULL, 0);
+ }
xmlSetStructuredErrorFunc(NULL, NULL);
Binary file not shown.
View
@@ -13,12 +13,22 @@ function make_error(object) {
module.exports.parse = function(assert) {
var filename = __dirname + '/fixtures/parser.html';
- var str = fs.readFileSync(filename, 'utf8');
- var doc = libxml.parseHtml(str);
- assert.equal('html', doc.root().name());
- assert.equal('Test HTML document', doc.get('head/title').text());
- assert.equal('HTML content!', doc.get('body/span').text());
+ function attempt_parse(encoding) {
+ var str = fs.readFileSync(filename, encoding);
+
+ var doc = libxml.parseHtml(str);
+ assert.equal('html', doc.root().name());
+ assert.equal('Test HTML document', doc.get('head/title').text());
+ assert.equal('HTML content!', doc.get('body/span').text());
+ }
+
+ // Parse via a string
+ attempt_parse('utf-8');
+
+ // Parse via a Buffer
+ attempt_parse(null);
+
assert.done();
};
View
@@ -19,6 +19,17 @@ module.exports.parse = function(assert) {
assert.done();
};
+module.exports.parse_buffer = function(assert) {
+ var filename = __dirname + '/fixtures/parser-utf16.xml';
+ var buf = fs.readFileSync(filename);
+
+ var doc = libxml.parseXml(buf);
+ assert.equal('1.0', doc.version());
+ assert.equal('UTF-16', doc.encoding());
+ assert.equal('root', doc.root().name());
+ assert.done();
+};
+
module.exports.parse_synonym = function(assert) {
assert.strictEqual(libxml.parseXml, libxml.parseXmlString);
assert.done();

0 comments on commit 1f3f5e5

Please sign in to comment.