Permalink
Browse files

Adding optional support for Processing Instructions

  • Loading branch information...
1 parent 3ea4327 commit 1044e77d783128e44732ef435113e505fe498cc8 @teknopaul committed May 22, 2012
Showing with 85 additions and 6 deletions.
  1. +23 −0 example/test-pis.js
  2. +62 −6 lib/dom-js.js
View
@@ -0,0 +1,23 @@
+var util = require('util');
+var DomJS = require("dom-js").DomJS;
+
+var domjs = new DomJS();
+/**
+ * Test for processing instructions, aka PIs.
+ */
+
+console.log("\nExample XML");
+var string = '<?xml version="1.0" encoding="UTF-8"?>' +
+ '<?xml-stylesheet href="classic.xsl" type="text/xml"?>' +
+ '<xml>' +
+ '<a>a</a>' +
+ '<!-- the comment --><elem someAtt="fat &amp; red">Hello &quot;World&quot;</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>';
+console.log(string);
+domjs.parseProcessingInstructions = true;
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+
+});
+
+domjs.reset(); //before reuse
View
@@ -1,7 +1,7 @@
var sax = require("sax");
/**
- * Really simple XML DOM implementation based on sax that works with Strings.
+ * Simple XML DOM implementation based on sax that works with Strings.
*
* If you have an XML string and want a DOM this utility is convenient.
*
@@ -13,10 +13,11 @@ var sax = require("sax");
* If you want to compile C there are versions based on libxml2
* and jsdom is full featured but complicated.
*
- * This is "lightweight" meaning really simple and serves my purpose, it does not support namespaces or all
+ * This is "lightweight" meaning simple and serves my purpose, it does not support namespaces or all
* of the features of XML 1.0 it just takes a string and returns a JavaScript object graph.
*
- * There are only three types of object supported Element, Text and Comment.
+ * There are only 5 types of object supported
+ * Element, Text, ProcessingInstruction, CDATASection and Comment.
*
* e.g.
*
@@ -31,17 +32,33 @@ var sax = require("sax");
* ]
* }
*
- * The object returned can be serialized back out with obj.toXml();
+ * The object returned is an instance of Element and can be serialized back out with obj.toXml();
*
*
* @constructor DomJS
*/
var DomJS = function() {
+ /**
+ * The root element of the XML document currently being parsed.
+ */
this.root = null;
this.stack = new Array();
this.currElement = null;
+ /**
+ * Flag that is set to true if there was a Sax error parsing the XML.
+ */
this.error = false;
+ /**
+ * Use strict parsing, this value is passed to the sax parser.
+ */
this.strict = true;
+ /**
+ * Set to true to parse and write ProcessingInstructions
+ * By default false for backwards comatability
+ */
+ this.parseProcessingInstructions = false;
+ // undefined by default
+ // this.processingInstructions = new Array();
};
DomJS.prototype.parse = function(string, cb) {
@@ -56,6 +73,7 @@ DomJS.prototype.parse = function(string, cb) {
self.error = true;
cb(true, err);
};
+
parser.ontext = function (text) {
if (self.currElement == null) {
// console.log("Content in the prolog " + text);
@@ -64,30 +82,38 @@ DomJS.prototype.parse = function(string, cb) {
var textNode = new Text(text);
self.currElement.children.push(textNode);
};
+
parser.onopencdata = function () {
var cdataNode = new CDATASection();
self.currElement.children.push(cdataNode);
};
+
parser.oncdata = function (data) {
var cdataNode = self.currElement.children[self.currElement.children.length - 1];
cdataNode.appendData(data);
};
+
// do nothing on parser.onclosecdata
parser.onopentag = function (node) {
var elem = new Element(node.name, node.attributes);
if (self.root == null) {
self.root = elem;
+ if ( self.processingInstructions ) {
+ elem.processingInstructions = self.processingInstructions;
+ }
}
if (self.currElement != null) {
self.currElement.children.push(elem);
}
self.currElement = elem;
self.stack.push(self.currElement);
};
+
parser.onclosetag = function (node) {
self.stack.pop();
self.currElement = self.stack[self.stack.length - 1 ];// self.stack.peek();
};
+
parser.oncomment = function (comment) {
if (self.currElement == null) {
//console.log("Comments in the prolog discarded " + comment);
@@ -96,6 +122,16 @@ DomJS.prototype.parse = function(string, cb) {
var commentNode = new Comment(comment);
self.currElement.children.push(commentNode);
};
+
+ parser.onprocessinginstruction = function (node) {
+ if (self.parseProcessingInstructions === true) {
+ if ( self.processingInstructions === undefined) {
+ self.processingInstructions = new Array();
+ }
+ var pi = new ProcessingInstruction(node.name, node.body);
+ self.processingInstructions.push(pi);
+ }
+ };
parser.onend = function () {
if ( self.error == false) {
@@ -121,16 +157,28 @@ var escape = function(string) {
.replace(/'/g, '&apos;');
};
-
+/**
+ *
+ * @constructor Element
+ */
var Element = function(name, attributes, children ) {
this.name = name;
this.attributes = attributes || [];
this.children = children || [];
-}
+ // undefined by default
+ // this.processingInstructions = new Array();
+};
Element.prototype.toXml = function(sb) {
if (typeof sb == 'undefined') {
sb = {buf:''}; // Strings are pass by value in JS it seems
}
+
+ if (this.processingInstructions) {
+ for (var i = 0 ; i < this.processingInstructions.length ; i++) {
+ sb.buf += '<?' + this.processingInstructions[i].name + ' ' + this.processingInstructions[i].body + '?>\n';
+ }
+ }
+
sb.buf += '<' + this.name;
for (att in this.attributes) {
@@ -177,6 +225,14 @@ Comment.prototype.toXml = function(sb) {
sb.buf += '<!--' + this.comment + '-->';
};
+var ProcessingInstruction = function(name, body) {
+ this.name = name;
+ this.body = body;
+};
+ProcessingInstruction.prototype.toXml = function(sb) {
+ sb.buf += '<?' + this.name + ' ' + this.body + '?>';
+};
+
var CDATASection = function(data){
this.text = data || '';
};

0 comments on commit 1044e77

Please sign in to comment.