Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit c9c303d
Showing
7 changed files
with
332 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
dom-js is a node module that creates a DOM from a String using sax-js. | ||
|
||
DomJS has a parse() method that takes a string and a callback which is used when the DOM is ready, or if there is an error. | ||
|
||
The DOM returned is made of Element, Comment and Text objects (N.B. no ProccessingInstruction or other stuff) | ||
|
||
An Element has a name, a map of attributes, and an array of children, so you can find everything. | ||
|
||
You get a couple of convenience methods on Element text() and firstChild() usage is hopefully obvious and save some boiler plate null checking. | ||
|
||
The Element object has a method toXml() which returns a String with whitespace in tact. | ||
|
||
Thats it (for now) no bells, no whistles. | ||
|
||
|
||
var DomJS = require("dom-js").DomJS; | ||
|
||
var domjs = new DomJS(); | ||
|
||
var string = '<xml><!-- the comment --><elem someAtt="fat & red">Hello & World</elem></xml>'; | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
|
||
|
||
|
||
Gotchas (that I can fix if it bothers anyone) | ||
|
||
An empty tag <a></a> will always be serialized in the short form <a/>. | ||
|
||
<?xml version="1.0" encoding="UTF-8"?> is ignored and dropped if there is one | ||
var xml = '<?xml version="1.0" encoding="UTF-8"?>' + dom.toXml() to fix that one :) | ||
|
||
An instance of DomJS should only be used once, but if you must reuse, call reset() before re-calling parse(). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
var util = require('util'); | ||
var DomJS = require("dom-js").DomJS; | ||
|
||
var domjs = new DomJS(); | ||
|
||
var string = '<xml><!-- the comment --><elem someAtt="fat & red">Hello & World</elem></xml>'; | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
var util = require('util'); | ||
var DomJS = require("dom-js").DomJS; | ||
|
||
var domjs = new DomJS(); | ||
|
||
console.log("\nExample XML"); | ||
var string = '<xml><!-- the comment --><elem someAtt="fat'red">Hello & World</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse | ||
|
||
console.log("\nMinimal XML"); | ||
string = '<xml/>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse | ||
|
||
console.log("\nLeading comments are dropped"); | ||
string = '<!-- leading comments are dropped--><xml/>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse | ||
|
||
console.log("\nEmbeded comments are kept "); | ||
string = '<xml><!-- embeded comments are kept --></xml>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse | ||
|
||
console.log("\nWhitespace is respected"); | ||
string = '<xml>\n<node/>\n</xml>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse | ||
|
||
console.log("\nEmpty tags are always compressed to the short form"); | ||
string = '<xml></xml>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
var util = require('util'); | ||
var DomJS = require("dom-js").DomJS; | ||
|
||
var domjs = new DomJS(); | ||
|
||
/* | ||
* N.B. dont really support namespaces but it does not break. | ||
*/ | ||
console.log("\nExample XML With a namespace"); | ||
var string = '<ns:xml><!-- the comment --><ns:elem someAtt="fat & red">Hello & World</ns:elem></ns:xml>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
var util = require('util'); | ||
var DomJS = require("dom-js").DomJS; | ||
|
||
var domjs = new DomJS(); | ||
|
||
console.log("\nExample XML"); | ||
var string = '<?xml version="1.0" encoding="UTF-8"?><xml><!-- the comment --><elem someAtt="fat & red">Hello "World"</elem><elem otherAtt="val1"/><elem lastAtt="val1"/></xml>'; | ||
console.log(string); | ||
domjs.parse(string, function(err, dom) { | ||
console.log(util.inspect(dom, false, 23)); | ||
console.log("serializes to : " + dom.toXml()); | ||
}); | ||
|
||
domjs.reset(); //before reuse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
var sax = require("sax"); | ||
var strict = true; | ||
|
||
/** | ||
* Really simple XML DOM implementation based on sax that works with Strings. | ||
* | ||
* If you have an XML string and want a DOM this utility is convenient. | ||
* | ||
* var domjs = new DomJS(); | ||
* domjs.parse(xmlString, function(err, dom) { | ||
* | ||
* }); | ||
* | ||
* If you want to compile C there are versions based on libxml2 | ||
* and jsdom is full featured but complicated. | ||
* | ||
* This is "lightweight" meaning really simple and serves my purpose, it does not support namespaces or all | ||
* of the features of XML 1.0 it just takes a string and returns a JavaScript object graph. | ||
* | ||
* There are only three types of object supported Element, Text and Comment. | ||
* | ||
* e.g. | ||
* | ||
* take <xml><elem att="val1"/><elem att="val1"/><elem att="val1"/></xml> | ||
* | ||
* return { name : "xml", | ||
* attributes : {} | ||
* children [ | ||
* { name : "elem", attributes : {att:'val1'}, children [] }, | ||
* { name : "elem", attributes : {att:'val1'}, children [] }, | ||
* { name : "elem", attributes : {att:'val1'}, children [] } | ||
* ] | ||
* } | ||
* | ||
* The object returned can be serialized back out with obj.toXml(); | ||
* | ||
* | ||
* @constructor DomJS | ||
*/ | ||
DomJS = function() { | ||
this.root = null; | ||
this.stack = new Array(); | ||
this.currElement = null; | ||
this.error = false; | ||
}; | ||
|
||
DomJS.prototype.parse = function(string, cb) { | ||
var self = this; | ||
parser = sax.parser(strict); | ||
|
||
parser.onerror = function (err) { | ||
this.error = true; | ||
cb(true, err); | ||
}; | ||
parser.ontext = function (text) { | ||
if (self.currElement == null) { | ||
// console.log("Content in the prolog " + text); | ||
return; | ||
} | ||
textNode = new Text(text); | ||
self.currElement.children.push(textNode); | ||
}; | ||
parser.onopentag = function (node) { | ||
var elem = new Element(node.name, node.attributes); | ||
if (self.root == null) { | ||
self.root = elem; | ||
} | ||
if (self.currElement != null) { | ||
self.currElement.children.push(elem); | ||
} | ||
self.currElement = elem; | ||
self.stack.push(self.currElement); | ||
}; | ||
parser.onclosetag = function (node) { | ||
self.stack.pop(); | ||
self.currElement = self.stack[self.stack.length - 1 ];// self.stack.peek(); | ||
}; | ||
parser.oncomment = function (comment) { | ||
if (self.currElement == null) { | ||
console.log("Comments in the prolog discarded " + comment); | ||
return; | ||
} | ||
commentNode = new Comment(comment); | ||
self.currElement.children.push(commentNode); | ||
}; | ||
|
||
parser.onend = function () { | ||
if ( self.error == false) { | ||
cb(false, self.root); | ||
} | ||
}; | ||
|
||
parser.write(string).close(); | ||
}; | ||
|
||
DomJS.prototype.reset = function() { | ||
this.root = null; | ||
this.stack = new Array(); | ||
this.currElement = null; | ||
this.error = false; | ||
}; | ||
|
||
escape = function(string) { | ||
return string.replace(/&/g, '&') | ||
.replace(/>/g, '>') | ||
.replace(/</g, '<') | ||
.replace(/"/g, '"') | ||
.replace(/'/g, '''); | ||
}; | ||
|
||
|
||
Element = function(name, attributes, children ) { | ||
this.name = name; | ||
this.attributes = attributes || []; | ||
this.children = children || []; | ||
|
||
this.toXml = function(sb) { | ||
if (typeof sb == 'undefined') { | ||
sb = {buf:''}; // Strings are pass by value in JS it seems | ||
} | ||
sb.buf += '<' + this.name; | ||
for (att in this.attributes) { | ||
|
||
sb.buf += ' ' + att + '="' + escape(this.attributes[att]) + '"'; | ||
} | ||
if (this.children.length != 0) { | ||
sb.buf += '>'; | ||
for (var i = 0 ; i < this.children.length ; i++) { | ||
this.children[i].toXml(sb); | ||
} | ||
sb.buf += '</' + this.name + '>'; | ||
} | ||
else { | ||
sb.buf += '/>'; | ||
} | ||
return sb.buf; | ||
}; | ||
|
||
this.firstChild = function() { | ||
if ( this.children.length > 0) { | ||
return this.children[0]; | ||
} | ||
return null; | ||
}; | ||
|
||
this.text = function() { | ||
if ( this.children.length > 0) { | ||
if (this.children[0].text) { | ||
return this.children[0].text; | ||
}; | ||
} | ||
return null; | ||
}; | ||
}; | ||
Text = function(data){ | ||
this.text = data; | ||
|
||
this.toXml = function(sb) { | ||
sb.buf += escape(this.text); | ||
}; | ||
}; | ||
Comment = function(comment) { | ||
this.comment = comment; | ||
|
||
this.toXml = function(sb) { | ||
sb.buf += '<!--' + this.comment + '-->'; | ||
}; | ||
}; | ||
|
||
|
||
exports.Element = Element; | ||
exports.Text = Text; | ||
exports.Comment = Comment; | ||
exports.DomJS = DomJS; | ||
exports.escape = escape; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"name": "dom-js", | ||
"version": "0.1", | ||
"dependencies": { | ||
"sax": ">=0.1.5" | ||
}, | ||
"directories": { | ||
"lib": "./lib" | ||
}, | ||
"main": "./lib/dom-js", | ||
"engines": { | ||
"node": "*" | ||
} | ||
} |