scaryguy edited this page Aug 23, 2015 · 30 revisions


LibXML bindings for node.js

var libxml = require("libxmljs");
var xml =  '<?xml version="1.0" encoding="UTF-8"?>' +
           '<root>' +
               '<child foo="bar">' +
                   '<grandchild baz="fizbuzz">grandchild content</grandchild>' +
               '</child>' +
               '<sibling>with content!</sibling>' +

var xmlDoc = libxml.parseXmlString(xml);

// xpath queries
var gchild = xmlDoc.get('//grandchild');

console.log(gchild.text());  // prints "grandchild content"

var children = xmlDoc.root().childNodes();
var child = children[0];

console.log(child.attr('foo').value()); // prints "bar"



var doc = libxml.parseXmlString(xmlString);
var doc = libxml.parseXmlString(xmlString, { noblanks: true });

The optional second argument is an object in which you can set recover, noent, noblanks, and nocdata properties to true to set the corresponding libxml parsing flag. noblanks removes non-significant whitespace and is particularly useful when round-tripping XML content with indentation, since the presence of non-significant whitespace in a document turns off automatic formatting in toString. (For other options see the source.)

SAX Parsing

SAX parsing objects are event emitters and callbacks can be connected in typical node.js fashion.

var parser = new libxml.SaxParser();

parser.on('startDocument', ...);
parser.on('startElement', ...);

// parse a complete document

SAX Push Parsing

Push parsers are created the same way DOM parsers are, but take input a chunk at a time:

var parser = new libxml.SaxPushParser();

// connect any callbacks here
  .on('startDocument', ...)
  .on('startElement', ...)

while(xmlChunk) {


To build an XML document simply create all the required nodes (indentation is meant to represent location in the tree, you can capture any intermediate variable to reuse):

var doc = new libxml.Document();
    .node('child').attr({foo: 'bar'})
      .node('grandchild', 'grandchild content').attr({baz: 'fizbuzz'})
    .node('sibling', 'with content!');

Calling doc.toString() will yield the following XML:

<?xml version="1.0" encoding="UTF-8"?>
    <child foo="bar">
        <grandchild baz="fizbuzz">grandchild content</grandchild>
    <sibling>with content!</sibling>

Note that you must use toString() in order to create XML output. If you inspect the doc variable above without calling toString() on it, you will only receive an empty object.

Validating against XSD schema

var xsd = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"><xs:element name="comment"><xs:complexType><xs:all><xs:element name="author" type="xs:string"/><xs:element name="content" type="xs:string"/></xs:all></xs:complexType></xs:element></xs:schema>';
var xml_valid = '<?xml version="1.0"?><comment><author>author</author><content>nothing</content></comment>';
var xml_invalid = '<?xml version="1.0"?><comment>A comment</comment>';

var xsdDoc = libxml.parseXml(xsd);
var xmlDocValid = libxml.parseXml(xml_valid);
var xmlDocInvalid = libxml.parseXml(xml_invalid);

assert.equal(xmlDocValid.validate(xsdDoc), true);
assert.equal(xmlDocValid.validationErrors.length, 0);
assert.equal(xmlDocInvalid.validate(xsdDoc), false);

assert.equal(xmlDocInvalid.validationErrors.length, 2);

it prints something like this:

[ { [Error: Element 'comment': Character content other than whitespace is not allowed because the content type is 'element-only'.
  ] domain: 17, code: 1843, level: 2, column: 0, line: 2 },
  { [Error: Element 'comment': Missing child element(s). Expected is one of ( author, content ).
  ] domain: 17, code: 1871, level: 2, column: 0, line: 2 } ]