Permalink
Browse files

first commit

  • Loading branch information...
0 parents commit c9c303d4c6f6be7ade61cb554cc39f31283d2648 @teknopaul committed Jul 20, 2011
Showing with 332 additions and 0 deletions.
  1. +37 −0 README
  2. +11 −0 example/readme.js
  3. +62 −0 example/test-dom-js.js
  4. +18 −0 example/test-ns-dom-js.js
  5. +14 −0 example/test-one-dom-js.js
  6. +176 −0 lib/dom-js.js
  7. +14 −0 package.json
37 README
@@ -0,0 +1,37 @@
+dom-js is a node module that creates a DOM from a String using sax-js.
+
+DomJS has a parse() method that takes a string and a callback which is used when the DOM is ready, or if there is an error.
+
+The DOM returned is made of Element, Comment and Text objects (N.B. no ProccessingInstruction or other stuff)
+
+An Element has a name, a map of attributes, and an array of children, so you can find everything.
+
+You get a couple of convenience methods on Element text() and firstChild() usage is hopefully obvious and save some boiler plate null checking.
+
+The Element object has a method toXml() which returns a String with whitespace in tact.
+
+Thats it (for now) no bells, no whistles.
+
+
+var DomJS = require("dom-js").DomJS;
+
+var domjs = new DomJS();
+
+var string = '<xml><!-- the comment --><elem someAtt="fat &amp; red">Hello &amp; World</elem></xml>';
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+
+
+
+Gotchas (that I can fix if it bothers anyone)
+
+An empty tag <a></a> will always be serialized in the short form <a/>.
+
+<?xml version="1.0" encoding="UTF-8"?> is ignored and dropped if there is one
+var xml = '<?xml version="1.0" encoding="UTF-8"?>' + dom.toXml() to fix that one :)
+
+An instance of DomJS should only be used once, but if you must reuse, call reset() before re-calling parse().
+
11 example/readme.js
@@ -0,0 +1,11 @@
+var util = require('util');
+var DomJS = require("dom-js").DomJS;
+
+var domjs = new DomJS();
+
+var string = '<xml><!-- the comment --><elem someAtt="fat &amp; red">Hello &amp; World</elem></xml>';
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
62 example/test-dom-js.js
@@ -0,0 +1,62 @@
+var util = require('util');
+var DomJS = require("dom-js").DomJS;
+
+var domjs = new DomJS();
+
+console.log("\nExample XML");
+var string = '<xml><!-- the comment --><elem someAtt="fat&apos;red">Hello &amp; World</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
+
+console.log("\nMinimal XML");
+string = '<xml/>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
+
+console.log("\nLeading comments are dropped");
+string = '<!-- leading comments are dropped--><xml/>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
+
+console.log("\nEmbeded comments are kept ");
+string = '<xml><!-- embeded comments are kept --></xml>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
+
+console.log("\nWhitespace is respected");
+string = '<xml>\n<node/>\n</xml>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
+
+console.log("\nEmpty tags are always compressed to the short form");
+string = '<xml></xml>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
18 example/test-ns-dom-js.js
@@ -0,0 +1,18 @@
+var util = require('util');
+var DomJS = require("dom-js").DomJS;
+
+var domjs = new DomJS();
+
+/*
+ * N.B. dont really support namespaces but it does not break.
+ */
+console.log("\nExample XML With a namespace");
+var string = '<ns:xml><!-- the comment --><ns:elem someAtt="fat &amp; red">Hello &amp; World</ns:elem></ns:xml>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
+
14 example/test-one-dom-js.js
@@ -0,0 +1,14 @@
+var util = require('util');
+var DomJS = require("dom-js").DomJS;
+
+var domjs = new DomJS();
+
+console.log("\nExample XML");
+var string = '<?xml version="1.0" encoding="UTF-8"?><xml><!-- the comment --><elem someAtt="fat &amp; red">Hello &quot;World&quot;</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>';
+console.log(string);
+domjs.parse(string, function(err, dom) {
+ console.log(util.inspect(dom, false, 23));
+ console.log("serializes to : " + dom.toXml());
+});
+
+domjs.reset(); //before reuse
176 lib/dom-js.js
@@ -0,0 +1,176 @@
+var sax = require("sax");
+var strict = true;
+
+/**
+ * Really simple XML DOM implementation based on sax that works with Strings.
+ *
+ * If you have an XML string and want a DOM this utility is convenient.
+ *
+ * var domjs = new DomJS();
+ * domjs.parse(xmlString, function(err, dom) {
+ *
+ * });
+ *
+ * If you want to compile C there are versions based on libxml2
+ * and jsdom is full featured but complicated.
+ *
+ * This is "lightweight" meaning really simple and serves my purpose, it does not support namespaces or all
+ * of the features of XML 1.0 it just takes a string and returns a JavaScript object graph.
+ *
+ * There are only three types of object supported Element, Text and Comment.
+ *
+ * e.g.
+ *
+ * take <xml><elem att="val1"/><elem att="val1"/><elem att="val1"/></xml>
+ *
+ * return { name : "xml",
+ * attributes : {}
+ * children [
+ * { name : "elem", attributes : {att:'val1'}, children [] },
+ * { name : "elem", attributes : {att:'val1'}, children [] },
+ * { name : "elem", attributes : {att:'val1'}, children [] }
+ * ]
+ * }
+ *
+ * The object returned can be serialized back out with obj.toXml();
+ *
+ *
+ * @constructor DomJS
+ */
+DomJS = function() {
+ this.root = null;
+ this.stack = new Array();
+ this.currElement = null;
+ this.error = false;
+};
+
+DomJS.prototype.parse = function(string, cb) {
+ var self = this;
+ parser = sax.parser(strict);
+
+ parser.onerror = function (err) {
+ this.error = true;
+ cb(true, err);
+ };
+ parser.ontext = function (text) {
+ if (self.currElement == null) {
+ // console.log("Content in the prolog " + text);
+ return;
+ }
+ textNode = new Text(text);
+ self.currElement.children.push(textNode);
+ };
+ parser.onopentag = function (node) {
+ var elem = new Element(node.name, node.attributes);
+ if (self.root == null) {
+ self.root = elem;
+ }
+ if (self.currElement != null) {
+ self.currElement.children.push(elem);
+ }
+ self.currElement = elem;
+ self.stack.push(self.currElement);
+ };
+ parser.onclosetag = function (node) {
+ self.stack.pop();
+ self.currElement = self.stack[self.stack.length - 1 ];// self.stack.peek();
+ };
+ parser.oncomment = function (comment) {
+ if (self.currElement == null) {
+ console.log("Comments in the prolog discarded " + comment);
+ return;
+ }
+ commentNode = new Comment(comment);
+ self.currElement.children.push(commentNode);
+ };
+
+ parser.onend = function () {
+ if ( self.error == false) {
+ cb(false, self.root);
+ }
+ };
+
+ parser.write(string).close();
+};
+
+DomJS.prototype.reset = function() {
+ this.root = null;
+ this.stack = new Array();
+ this.currElement = null;
+ this.error = false;
+};
+
+escape = function(string) {
+ return string.replace(/&/g, '&amp;')
+ .replace(/>/g, '&gt;')
+ .replace(/</g, '&lt;')
+ .replace(/"/g, '&quot;')
+ .replace(/'/g, '&apos;');
+};
+
+
+Element = function(name, attributes, children ) {
+ this.name = name;
+ this.attributes = attributes || [];
+ this.children = children || [];
+
+ this.toXml = function(sb) {
+ if (typeof sb == 'undefined') {
+ sb = {buf:''}; // Strings are pass by value in JS it seems
+ }
+ sb.buf += '<' + this.name;
+ for (att in this.attributes) {
+
+ sb.buf += ' ' + att + '="' + escape(this.attributes[att]) + '"';
+ }
+ if (this.children.length != 0) {
+ sb.buf += '>';
+ for (var i = 0 ; i < this.children.length ; i++) {
+ this.children[i].toXml(sb);
+ }
+ sb.buf += '</' + this.name + '>';
+ }
+ else {
+ sb.buf += '/>';
+ }
+ return sb.buf;
+ };
+
+ this.firstChild = function() {
+ if ( this.children.length > 0) {
+ return this.children[0];
+ }
+ return null;
+ };
+
+ this.text = function() {
+ if ( this.children.length > 0) {
+ if (this.children[0].text) {
+ return this.children[0].text;
+ };
+ }
+ return null;
+ };
+};
+Text = function(data){
+ this.text = data;
+
+ this.toXml = function(sb) {
+ sb.buf += escape(this.text);
+ };
+};
+Comment = function(comment) {
+ this.comment = comment;
+
+ this.toXml = function(sb) {
+ sb.buf += '<!--' + this.comment + '-->';
+ };
+};
+
+
+exports.Element = Element;
+exports.Text = Text;
+exports.Comment = Comment;
+exports.DomJS = DomJS;
+exports.escape = escape;
+
14 package.json
@@ -0,0 +1,14 @@
+{
+ "name": "dom-js",
+ "version": "0.1",
+ "dependencies": {
+ "sax": ">=0.1.5"
+ },
+ "directories": {
+ "lib": "./lib"
+ },
+ "main": "./lib/dom-js",
+ "engines": {
+ "node": "*"
+ }
+}

0 comments on commit c9c303d

Please sign in to comment.