Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
teknopaul committed Jul 20, 2011
0 parents commit c9c303d
Show file tree
Hide file tree
Showing 7 changed files with 332 additions and 0 deletions.
37 changes: 37 additions & 0 deletions README
@@ -0,0 +1,37 @@
dom-js is a node module that creates a DOM from a String using sax-js.

DomJS has a parse() method that takes a string and a callback which is used when the DOM is ready, or if there is an error.

The DOM returned is made of Element, Comment and Text objects (N.B. no ProccessingInstruction or other stuff)

An Element has a name, a map of attributes, and an array of children, so you can find everything.

You get a couple of convenience methods on Element text() and firstChild() usage is hopefully obvious and save some boiler plate null checking.

The Element object has a method toXml() which returns a String with whitespace in tact.

Thats it (for now) no bells, no whistles.


var DomJS = require("dom-js").DomJS;

var domjs = new DomJS();

var string = '<xml><!-- the comment --><elem someAtt="fat &amp; red">Hello &amp; World</elem></xml>';
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});




Gotchas (that I can fix if it bothers anyone)

An empty tag <a></a> will always be serialized in the short form <a/>.

<?xml version="1.0" encoding="UTF-8"?> is ignored and dropped if there is one
var xml = '<?xml version="1.0" encoding="UTF-8"?>' + dom.toXml() to fix that one :)

An instance of DomJS should only be used once, but if you must reuse, call reset() before re-calling parse().

11 changes: 11 additions & 0 deletions example/readme.js
@@ -0,0 +1,11 @@
var util = require('util');
var DomJS = require("dom-js").DomJS;

var domjs = new DomJS();

var string = '<xml><!-- the comment --><elem someAtt="fat &amp; red">Hello &amp; World</elem></xml>';
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

62 changes: 62 additions & 0 deletions example/test-dom-js.js
@@ -0,0 +1,62 @@
var util = require('util');
var DomJS = require("dom-js").DomJS;

var domjs = new DomJS();

console.log("\nExample XML");
var string = '<xml><!-- the comment --><elem someAtt="fat&apos;red">Hello &amp; World</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse

console.log("\nMinimal XML");
string = '<xml/>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse

console.log("\nLeading comments are dropped");
string = '<!-- leading comments are dropped--><xml/>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse

console.log("\nEmbeded comments are kept ");
string = '<xml><!-- embeded comments are kept --></xml>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse

console.log("\nWhitespace is respected");
string = '<xml>\n<node/>\n</xml>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse

console.log("\nEmpty tags are always compressed to the short form");
string = '<xml></xml>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});
18 changes: 18 additions & 0 deletions example/test-ns-dom-js.js
@@ -0,0 +1,18 @@
var util = require('util');
var DomJS = require("dom-js").DomJS;

var domjs = new DomJS();

/*
* N.B. dont really support namespaces but it does not break.
*/
console.log("\nExample XML With a namespace");
var string = '<ns:xml><!-- the comment --><ns:elem someAtt="fat &amp; red">Hello &amp; World</ns:elem></ns:xml>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse

14 changes: 14 additions & 0 deletions example/test-one-dom-js.js
@@ -0,0 +1,14 @@
var util = require('util');
var DomJS = require("dom-js").DomJS;

var domjs = new DomJS();

console.log("\nExample XML");
var string = '<?xml version="1.0" encoding="UTF-8"?><xml><!-- the comment --><elem someAtt="fat &amp; red">Hello &quot;World&quot;</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>';
console.log(string);
domjs.parse(string, function(err, dom) {
console.log(util.inspect(dom, false, 23));
console.log("serializes to : " + dom.toXml());
});

domjs.reset(); //before reuse
176 changes: 176 additions & 0 deletions lib/dom-js.js
@@ -0,0 +1,176 @@
var sax = require("sax");
var strict = true;

/**
* Really simple XML DOM implementation based on sax that works with Strings.
*
* If you have an XML string and want a DOM this utility is convenient.
*
* var domjs = new DomJS();
* domjs.parse(xmlString, function(err, dom) {
*
* });
*
* If you want to compile C there are versions based on libxml2
* and jsdom is full featured but complicated.
*
* This is "lightweight" meaning really simple and serves my purpose, it does not support namespaces or all
* of the features of XML 1.0 it just takes a string and returns a JavaScript object graph.
*
* There are only three types of object supported Element, Text and Comment.
*
* e.g.
*
* take <xml><elem att="val1"/><elem att="val1"/><elem att="val1"/></xml>
*
* return { name : "xml",
* attributes : {}
* children [
* { name : "elem", attributes : {att:'val1'}, children [] },
* { name : "elem", attributes : {att:'val1'}, children [] },
* { name : "elem", attributes : {att:'val1'}, children [] }
* ]
* }
*
* The object returned can be serialized back out with obj.toXml();
*
*
* @constructor DomJS
*/
DomJS = function() {
this.root = null;
this.stack = new Array();
this.currElement = null;
this.error = false;
};

DomJS.prototype.parse = function(string, cb) {
var self = this;
parser = sax.parser(strict);

parser.onerror = function (err) {
this.error = true;
cb(true, err);
};
parser.ontext = function (text) {
if (self.currElement == null) {
// console.log("Content in the prolog " + text);
return;
}
textNode = new Text(text);
self.currElement.children.push(textNode);
};
parser.onopentag = function (node) {
var elem = new Element(node.name, node.attributes);
if (self.root == null) {
self.root = elem;
}
if (self.currElement != null) {
self.currElement.children.push(elem);
}
self.currElement = elem;
self.stack.push(self.currElement);
};
parser.onclosetag = function (node) {
self.stack.pop();
self.currElement = self.stack[self.stack.length - 1 ];// self.stack.peek();
};
parser.oncomment = function (comment) {
if (self.currElement == null) {
console.log("Comments in the prolog discarded " + comment);
return;
}
commentNode = new Comment(comment);
self.currElement.children.push(commentNode);
};

parser.onend = function () {
if ( self.error == false) {
cb(false, self.root);
}
};

parser.write(string).close();
};

DomJS.prototype.reset = function() {
this.root = null;
this.stack = new Array();
this.currElement = null;
this.error = false;
};

escape = function(string) {
return string.replace(/&/g, '&amp;')
.replace(/>/g, '&gt;')
.replace(/</g, '&lt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
};


Element = function(name, attributes, children ) {
this.name = name;
this.attributes = attributes || [];
this.children = children || [];

this.toXml = function(sb) {
if (typeof sb == 'undefined') {
sb = {buf:''}; // Strings are pass by value in JS it seems
}
sb.buf += '<' + this.name;
for (att in this.attributes) {

sb.buf += ' ' + att + '="' + escape(this.attributes[att]) + '"';
}
if (this.children.length != 0) {
sb.buf += '>';
for (var i = 0 ; i < this.children.length ; i++) {
this.children[i].toXml(sb);
}
sb.buf += '</' + this.name + '>';
}
else {
sb.buf += '/>';
}
return sb.buf;
};

this.firstChild = function() {
if ( this.children.length > 0) {
return this.children[0];
}
return null;
};

this.text = function() {
if ( this.children.length > 0) {
if (this.children[0].text) {
return this.children[0].text;
};
}
return null;
};
};
Text = function(data){
this.text = data;

this.toXml = function(sb) {
sb.buf += escape(this.text);
};
};
Comment = function(comment) {
this.comment = comment;

this.toXml = function(sb) {
sb.buf += '<!--' + this.comment + '-->';
};
};


exports.Element = Element;
exports.Text = Text;
exports.Comment = Comment;
exports.DomJS = DomJS;
exports.escape = escape;

14 changes: 14 additions & 0 deletions package.json
@@ -0,0 +1,14 @@
{
"name": "dom-js",
"version": "0.1",
"dependencies": {
"sax": ">=0.1.5"
},
"directories": {
"lib": "./lib"
},
"main": "./lib/dom-js",
"engines": {
"node": "*"
}
}

0 comments on commit c9c303d

Please sign in to comment.