Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed DomUtils.testElement() and added new, related projects to the R…

…EADME
  • Loading branch information...
commit 00fbea9eff870ff3b7453c31b9df83d56243e5b9 1 parent 8e538e0
Chris Winberry authored
7 README.md
View
@@ -177,3 +177,10 @@ becomes:
##DomUtils
###TBD (see utils_example.js for now)
+
+##Related Projects
+
+Looking for CSS selectors to search the DOM? Try Node-SoupSelect, a port of SoupSelect to NodeJS: http://github.com/harryf/node-soupselect
+
+There's also a port of hpricot to NodeJS that uses node-HtmlParser for HTML parsing: http://github.com/silentrob/Apricot
+
26 lib/node-htmlparser.js
View
@@ -18,7 +18,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
-/* v1.5.0 */
+/* v1.6.3 */
(function () {
@@ -656,28 +656,36 @@ function DefaultHandler (callback, options) {
var DomUtils = {
testElement: function DomUtils$testElement (options, element) {
if (!element) {
- return(false);
+ return false;
}
for (var key in options) {
if (key == "tag_name") {
if (element.type != "tag" && element.type != "script" && element.type != "style") {
- return(false);
+ return false;
+ }
+ if (!options["tag_name"](element.name)) {
+ return false;
}
- return(options["tag_name"](element.name));
} else if (key == "tag_type") {
- return(options["tag_type"](element.type));
+ if (!options["tag_type"](element.type)) {
+ return false;
+ }
} else if (key == "tag_contains") {
if (element.type != "text" && element.type != "comment" && element.type != "directive") {
- return(false);
+ return false;
+ }
+ if (!options["tag_contains"](element.data)) {
+ return false;
}
- return(options["tag_contains"](element.data));
} else {
- return(element.attribs && options[key](element.attribs[key]));
+ if (!element.attribs || !options[key](element.attribs[key])) {
+ return false;
+ }
}
}
- return(true);
+ return true;
}
, getElements: function DomUtils$getElements (options, currentElement, recurse, limit) {
4 lib/node-htmlparser.min.js
View
@@ -18,5 +18,5 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
-/* v1.6.2 */
-(function(){function e(a){this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function g(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags==undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require== "function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"};e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/; e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(new Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()};e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer= "";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current=this._elementsCurrent=0;this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()}; e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs=function(a){for(var c=a.length,b=0;b<c;){var h=a[b++];if(h.type==d.Tag||h.type==d.Script||h.type==d.style)this.parseAttribs(h)}return a};e.prototype.parseAttribs=function(a){if(!(a.type!=d.Script&&a.type!=d.Style&& a.type!=d.Tag)){var c=a.data.split(e._reWhitespace,1)[0];c=a.data.substring(c.length);if(!(c.length<1)){var b;for(e._reAttrib.lastIndex=0;b=e._reAttrib.exec(c);){if(a.attribs==undefined)a.attribs={};if(typeof b[1]=="string"&&b[1].length)a.attribs[b[1]]=b[2];else if(typeof b[3]=="string"&&b[3].length)a.attribs[b[3].toString()]=b[4].toString();else if(typeof b[5]=="string"&&b[5].length)a.attribs[b[5]]=b[6];else if(typeof b[7]=="string"&&b[7].length)a.attribs[b[7]]=b[7]}}}};e.prototype.parseTagName= function(a){if(a==null||a=="")return"";a=e._reTagName.exec(a);if(!a)return"";return(a[1]?"/":"")+a[2]};e.prototype.parseTags=function(){for(var a=this._buffer.length-1;e._reTags.test(this._buffer);){this._next=e._reTags.lastIndex-1;var c=this._buffer.charAt(this._next),b=this._buffer.substring(this._current,this._next);b={raw:b,data:this._parseState==d.Text?b:b.replace(e._reTrim,""),type:this._parseState};var h=this.parseTagName(b.data);if(this._tagStack.length)if(this._tagStack[this._tagStack.length- 1]==d.Script)if(h=="/script")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text){var i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+this._prevTagSep+b.raw;b.raw=b.data=""}}}else if(this._tagStack[this._tagStack.length-1]==d.Style)if(h=="/style")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text)if(b.raw!= ""){i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+this._prevTagSep+b.raw;b.raw=b.data=""}else i.raw=i.data=i.raw+this._prevTagSep;else if(b.raw!="")b.raw=b.data=b.raw}}else if(this._tagStack[this._tagStack.length-1]==d.Comment){var j=b.raw.length;if(b.raw.charAt(j-2)=="-"&&b.raw.charAt(j-1)=="-"&&c==">"){this._tagStack.pop();if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){i=this._elements[this._elements.length-1];i.raw=i.data=(i.raw+b.raw).replace(e._reTrimComment, "");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=h;if(b.raw.indexOf("!--")==0){b.type=d.Comment;delete b.name;j=b.raw.length;if(b.raw.charAt(j-1)=="-"&&b.raw.charAt(j-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+= c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){this.parseAttribs(b); this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current=this._next+1;this._prevTagSep=c}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.validateHandler=function(a){if(typeof a!="object")throw new Error("Handler is not an object");if(typeof a.reset!= "function")throw new Error("Handler method 'reset' is invalid");if(typeof a.done!="function")throw new Error("Handler method 'done' is invalid");if(typeof a.writeTag!="function")throw new Error("Handler method 'writeTag' is invalid");if(typeof a.writeText!="function")throw new Error("Handler method 'writeText' is invalid");if(typeof a.writeComment!="function")throw new Error("Handler method 'writeComment' is invalid");if(typeof a.writeDirective!="function")throw new Error("Handler method 'writeDirective' is invalid"); };e.prototype.writeHandler=function(a){if(!(this._tagStack.length&&!!!a))for(;this._elements.length;){a=this._elements.shift();switch(a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a);break}}};e.prototype.handleError=function(a){if(typeof this._handler.error=="function")this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype; a.super_=c;a.prototype=new b;a.prototype.constructor=a})(n,g);n.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(k){return k=="rss"||k=="feed"},this.dom,false);if(b.length)c=b[0];if(c){if(c.name=="rss"){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(h){}try{a.link=f.getElementsByTagName("link",c.children,false)[0].children[0].data}catch(i){}try{a.description=f.getElementsByTagName("description",c.children, false)[0].children[0].data}catch(j){}try{a.updated=new Date(f.getElementsByTagName("lastBuildDate",c.children,false)[0].children[0].data)}catch(m){}try{a.author=f.getElementsByTagName("managingEditor",c.children,false)[0].children[0].data}catch(o){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(k){var l={};try{l.id=f.getElementsByTagName("guid",k.children,false)[0].children[0].data}catch(q){}try{l.title=f.getElementsByTagName("title",k.children,false)[0].children[0].data}catch(r){}try{l.link= f.getElementsByTagName("link",k.children,false)[0].children[0].data}catch(s){}try{l.description=f.getElementsByTagName("description",k.children,false)[0].children[0].data}catch(t){}try{l.pubDate=new Date(f.getElementsByTagName("pubDate",k.children,false)[0].children[0].data)}catch(u){}a.items.push(l)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id",c.children,false)[0].children[0].data}catch(p){}try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(v){}try{a.link= f.getElementsByTagName("link",c.children,false)[0].attribs.href}catch(w){}try{a.description=f.getElementsByTagName("subtitle",c.children,false)[0].children[0].data}catch(x){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,false)[0].children[0].data)}catch(y){}try{a.author=f.getElementsByTagName("email",c.children,true)[0].children[0].data}catch(z){}a.items=[];f.getElementsByTagName("entry",c.children).forEach(function(k){var l={};try{l.id=f.getElementsByTagName("id",k.children, false)[0].children[0].data}catch(q){}try{l.title=f.getElementsByTagName("title",k.children,false)[0].children[0].data}catch(r){}try{l.link=f.getElementsByTagName("link",k.children,false)[0].attribs.href}catch(s){}try{l.description=f.getElementsByTagName("summary",k.children,false)[0].children[0].data}catch(t){}try{l.pubDate=new Date(f.getElementsByTagName("updated",k.children,false)[0].children[0].data)}catch(u){}a.items.push(l)})}this.dom=a}n.super_.prototype.done.call(this)};g._emptyTags={area:1, base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};g.reWhitespace=/^\s*$/;g.prototype.dom=null;g.prototype.reset=function(){this.dom=[];this._done=false;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};g.prototype.done=function(){this._done=true;this.handleCallback(null)};g.prototype.writeTag=function(a){this.handleElement(a)};g.prototype.writeText=function(a){if(this._options.ignoreWhitespace)if(g.reWhitespace.test(a.data))return; this.handleElement(a)};g.prototype.writeComment=function(a){this.handleElement(a)};g.prototype.writeDirective=function(a){this.handleElement(a)};g.prototype.error=function(a){this.handleCallback(a)};g.prototype._options=null;g.prototype._callback=null;g.prototype._done=false;g.prototype._tagStack=null;g.prototype.handleCallback=function(a){if(typeof this._callback!="function")if(a)throw a;else return;this._callback(a,this.dom)};g.prototype.handleElement=function(a){this._done&&this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()")); if(!this._options.verbose){delete a.raw;if(a.type=="tag"||a.type=="script"||a.type=="style")delete a.data}if(this._tagStack.last())if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if(a.name.charAt(0)=="/"){a=a.name.substring(1);if(!this._options.enforceEmptyTags||!g._emptyTags[a]){for(var c=this._tagStack.length-1;c>-1&&this._tagStack[c--].name!=a;);if(c>-1||this._tagStack[0].name==a)for(;c<this._tagStack.length-1;)this._tagStack.pop()}}else{if(!this._tagStack.last().children)this._tagStack.last().children= [];this._tagStack.last().children.push(a);if(!this._options.enforceEmptyTags||!g._emptyTags[a.name])this._tagStack.push(a)}else{if(!this._tagStack.last().children)this._tagStack.last().children=[];this._tagStack.last().children.push(a)}else if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive){if(a.name.charAt(0)!="/"){this.dom.push(a);if(!this._options.enforceEmptyTags||!g._emptyTags[a.name])this._tagStack.push(a)}}else this.dom.push(a)};var f={testElement:function(a,c){if(!c)return false;for(var b in a)if(b== "tag_name"){if(c.type!="tag"&&c.type!="script"&&c.type!="style")return false;return a.tag_name(c.name)}else if(b=="tag_type")return a.tag_type(c.type);else if(b=="tag_contains"){if(c.type!="text"&&c.type!="comment"&&c.type!="directive")return false;return a.tag_contains(c.data)}else return c.attribs&&a[b](c.attribs[b]);return true},getElements:function(a,c,b,h){function i(o){return function(p){return p==o}}b=b===undefined||b===null||!!b;h=isNaN(parseInt(h))?-1:parseInt(h);if(!c)return[];var j=[]; for(var m in a)if(typeof a[m]!="function")a[m]=i(a[m]);f.testElement(a,c)&&j.push(c);if(h>=0&&j.length>=h)return j;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return j;for(m=0;m<c.length;m++){j=j.concat(f.getElements(a,c[m],b,h));if(h>=0&&j.length>=h)break}return j},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,h){return f.getElements({tag_name:a},c,b,h)},getElementsByTagType:function(a,c,b,h){return f.getElements({tag_type:a}, c,b,h)}};exports.Parser=e;exports.DefaultHandler=g;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})();
+/* v1.6.3 */
+(function(){function e(a){this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function g(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags==undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require== "function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"};e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/; e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()};e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer= "";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current=this._elementsCurrent=0;this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()}; e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs=function(a){for(var c=a.length,b=0;b<c;){var h=a[b++];if(h.type==d.Tag||h.type==d.Script||h.type==d.style)this.parseAttribs(h)}return a};e.prototype.parseAttribs=function(a){if(!(a.type!=d.Script&&a.type!=d.Style&& a.type!=d.Tag)){var c=a.data.split(e._reWhitespace,1)[0];c=a.data.substring(c.length);if(!(c.length<1)){var b;for(e._reAttrib.lastIndex=0;b=e._reAttrib.exec(c);){if(a.attribs==undefined)a.attribs={};if(typeof b[1]=="string"&&b[1].length)a.attribs[b[1]]=b[2];else if(typeof b[3]=="string"&&b[3].length)a.attribs[b[3].toString()]=b[4].toString();else if(typeof b[5]=="string"&&b[5].length)a.attribs[b[5]]=b[6];else if(typeof b[7]=="string"&&b[7].length)a.attribs[b[7]]=b[7]}}}};e.prototype.parseTagName= function(a){if(a==null||a=="")return"";a=e._reTagName.exec(a);if(!a)return"";return(a[1]?"/":"")+a[2]};e.prototype.parseTags=function(){for(var a=this._buffer.length-1;e._reTags.test(this._buffer);){this._next=e._reTags.lastIndex-1;var c=this._buffer.charAt(this._next),b=this._buffer.substring(this._current,this._next);b={raw:b,data:this._parseState==d.Text?b:b.replace(e._reTrim,""),type:this._parseState};var h=this.parseTagName(b.data);if(this._tagStack.length)if(this._tagStack[this._tagStack.length- 1]==d.Script)if(h=="/script")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text){var i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+this._prevTagSep+b.raw;b.raw=b.data=""}}}else if(this._tagStack[this._tagStack.length-1]==d.Style)if(h=="/style")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text)if(b.raw!= ""){i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+this._prevTagSep+b.raw;b.raw=b.data=""}else i.raw=i.data=i.raw+this._prevTagSep;else if(b.raw!="")b.raw=b.data=b.raw}}else if(this._tagStack[this._tagStack.length-1]==d.Comment){var j=b.raw.length;if(b.raw.charAt(j-2)=="-"&&b.raw.charAt(j-1)=="-"&&c==">"){this._tagStack.pop();if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){i=this._elements[this._elements.length-1];i.raw=i.data=(i.raw+b.raw).replace(e._reTrimComment, "");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=h;if(b.raw.indexOf("!--")==0){b.type=d.Comment;delete b.name;j=b.raw.length;if(b.raw.charAt(j-1)=="-"&&b.raw.charAt(j-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+= c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){this.parseAttribs(b); this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current=this._next+1;this._prevTagSep=c}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.validateHandler=function(a){if(typeof a!="object")throw Error("Handler is not an object");if(typeof a.reset!= "function")throw Error("Handler method 'reset' is invalid");if(typeof a.done!="function")throw Error("Handler method 'done' is invalid");if(typeof a.writeTag!="function")throw Error("Handler method 'writeTag' is invalid");if(typeof a.writeText!="function")throw Error("Handler method 'writeText' is invalid");if(typeof a.writeComment!="function")throw Error("Handler method 'writeComment' is invalid");if(typeof a.writeDirective!="function")throw Error("Handler method 'writeDirective' is invalid");}; e.prototype.writeHandler=function(a){a=!!a;if(!(this._tagStack.length&&!a))for(;this._elements.length;){a=this._elements.shift();switch(a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a)}}};e.prototype.handleError=function(a){if(typeof this._handler.error=="function")this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype; a.super_=c;a.prototype=new b;a.prototype.constructor=a})(n,g);n.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(k){return k=="rss"||k=="feed"},this.dom,false);if(b.length)c=b[0];if(c){if(c.name=="rss"){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(h){}try{a.link=f.getElementsByTagName("link",c.children,false)[0].children[0].data}catch(i){}try{a.description=f.getElementsByTagName("description",c.children, false)[0].children[0].data}catch(j){}try{a.updated=new Date(f.getElementsByTagName("lastBuildDate",c.children,false)[0].children[0].data)}catch(m){}try{a.author=f.getElementsByTagName("managingEditor",c.children,false)[0].children[0].data}catch(o){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(k){var l={};try{l.id=f.getElementsByTagName("guid",k.children,false)[0].children[0].data}catch(q){}try{l.title=f.getElementsByTagName("title",k.children,false)[0].children[0].data}catch(r){}try{l.link= f.getElementsByTagName("link",k.children,false)[0].children[0].data}catch(s){}try{l.description=f.getElementsByTagName("description",k.children,false)[0].children[0].data}catch(t){}try{l.pubDate=new Date(f.getElementsByTagName("pubDate",k.children,false)[0].children[0].data)}catch(u){}a.items.push(l)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id",c.children,false)[0].children[0].data}catch(p){}try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(v){}try{a.link= f.getElementsByTagName("link",c.children,false)[0].attribs.href}catch(w){}try{a.description=f.getElementsByTagName("subtitle",c.children,false)[0].children[0].data}catch(x){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,false)[0].children[0].data)}catch(y){}try{a.author=f.getElementsByTagName("email",c.children,true)[0].children[0].data}catch(z){}a.items=[];f.getElementsByTagName("entry",c.children).forEach(function(k){var l={};try{l.id=f.getElementsByTagName("id",k.children, false)[0].children[0].data}catch(q){}try{l.title=f.getElementsByTagName("title",k.children,false)[0].children[0].data}catch(r){}try{l.link=f.getElementsByTagName("link",k.children,false)[0].attribs.href}catch(s){}try{l.description=f.getElementsByTagName("summary",k.children,false)[0].children[0].data}catch(t){}try{l.pubDate=new Date(f.getElementsByTagName("updated",k.children,false)[0].children[0].data)}catch(u){}a.items.push(l)})}this.dom=a}n.super_.prototype.done.call(this)};g._emptyTags={area:1, base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};g.reWhitespace=/^\s*$/;g.prototype.dom=null;g.prototype.reset=function(){this.dom=[];this._done=false;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};g.prototype.done=function(){this._done=true;this.handleCallback(null)};g.prototype.writeTag=function(a){this.handleElement(a)};g.prototype.writeText=function(a){if(this._options.ignoreWhitespace)if(g.reWhitespace.test(a.data))return; this.handleElement(a)};g.prototype.writeComment=function(a){this.handleElement(a)};g.prototype.writeDirective=function(a){this.handleElement(a)};g.prototype.error=function(a){this.handleCallback(a)};g.prototype._options=null;g.prototype._callback=null;g.prototype._done=false;g.prototype._tagStack=null;g.prototype.handleCallback=function(a){if(typeof this._callback!="function")if(a)throw a;else return;this._callback(a,this.dom)};g.prototype.handleElement=function(a){this._done&&this.handleCallback(Error("Writing to the handler after done() called is not allowed without a reset()")); if(!this._options.verbose){delete a.raw;if(a.type=="tag"||a.type=="script"||a.type=="style")delete a.data}if(this._tagStack.last())if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if(a.name.charAt(0)=="/"){a=a.name.substring(1);if(!this._options.enforceEmptyTags||!g._emptyTags[a]){for(var c=this._tagStack.length-1;c>-1&&this._tagStack[c--].name!=a;);if(c>-1||this._tagStack[0].name==a)for(;c<this._tagStack.length-1;)this._tagStack.pop()}}else{if(!this._tagStack.last().children)this._tagStack.last().children= [];this._tagStack.last().children.push(a);if(!this._options.enforceEmptyTags||!g._emptyTags[a.name])this._tagStack.push(a)}else{if(!this._tagStack.last().children)this._tagStack.last().children=[];this._tagStack.last().children.push(a)}else if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive){if(a.name.charAt(0)!="/"){this.dom.push(a);if(!this._options.enforceEmptyTags||!g._emptyTags[a.name])this._tagStack.push(a)}}else this.dom.push(a)};var f={testElement:function(a,c){if(!c)return false;for(var b in a)if(b== "tag_name"){if(c.type!="tag"&&c.type!="script"&&c.type!="style")return false;if(!a.tag_name(c.name))return false}else if(b=="tag_type"){if(!a.tag_type(c.type))return false}else if(b=="tag_contains"){if(c.type!="text"&&c.type!="comment"&&c.type!="directive")return false;if(!a.tag_contains(c.data))return false}else if(!c.attribs||!a[b](c.attribs[b]))return false;return true},getElements:function(a,c,b,h){function i(o){return function(p){return p==o}}b=b===undefined||b===null||!!b;h=isNaN(parseInt(h))? -1:parseInt(h);if(!c)return[];var j=[],m;for(m in a)if(typeof a[m]!="function")a[m]=i(a[m]);f.testElement(a,c)&&j.push(c);if(h>=0&&j.length>=h)return j;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return j;for(m=0;m<c.length;m++){j=j.concat(f.getElements(a,c[m],b,h));if(h>=0&&j.length>=h)break}return j},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,h){return f.getElements({tag_name:a},c,b,h)},getElementsByTagType:function(a, c,b,h){return f.getElements({tag_type:a},c,b,h)}};exports.Parser=e;exports.DefaultHandler=g;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})();
36 tests/18-enforce_empty_tags.js
View
@@ -0,0 +1,36 @@
+(function () {
+
+function RunningInNode () {
+ return(
+ (typeof require) == "function"
+ &&
+ (typeof exports) == "object"
+ &&
+ (typeof module) == "object"
+ &&
+ (typeof __filename) == "string"
+ &&
+ (typeof __dirname) == "string"
+ );
+}
+
+if (!RunningInNode()) {
+ if (!this.Tautologistics)
+ this.Tautologistics = {};
+ if (!this.Tautologistics.NodeHtmlParser)
+ this.Tautologistics.NodeHtmlParser = {};
+ if (!this.Tautologistics.NodeHtmlParser.Tests)
+ this.Tautologistics.NodeHtmlParser.Tests = [];
+ exports = {};
+ this.Tautologistics.NodeHtmlParser.Tests.push(exports);
+}
+
+exports.name = "Enforce empty tags";
+exports.html = "<link>text</link>";
+exports.expected =
+ [
+ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
+ , { raw: 'text', data: 'text', type: 'text' }
+ ];
+
+})();
38 tests/19-ignore_empty_tags.js
View
@@ -0,0 +1,38 @@
+(function () {
+
+function RunningInNode () {
+ return(
+ (typeof require) == "function"
+ &&
+ (typeof exports) == "object"
+ &&
+ (typeof module) == "object"
+ &&
+ (typeof __filename) == "string"
+ &&
+ (typeof __dirname) == "string"
+ );
+}
+
+if (!RunningInNode()) {
+ if (!this.Tautologistics)
+ this.Tautologistics = {};
+ if (!this.Tautologistics.NodeHtmlParser)
+ this.Tautologistics.NodeHtmlParser = {};
+ if (!this.Tautologistics.NodeHtmlParser.Tests)
+ this.Tautologistics.NodeHtmlParser.Tests = [];
+ exports = {};
+ this.Tautologistics.NodeHtmlParser.Tests.push(exports);
+}
+
+exports.name = "Ignore empty tags";
+exports.html = "<link>text</link>";
+exports.options = { enforceEmptyTags: false };
+exports.expected =
+ [
+ { raw: 'link', data: 'link', type: 'tag', name: 'link', children: [
+ { raw: 'text', data: 'text', type: 'text' }
+ ] }
+ ];
+
+})();
117 tests/20-rss.js
View
@@ -0,0 +1,117 @@
+(function () {
+
+function RunningInNode () {
+ return(
+ (typeof require) == "function"
+ &&
+ (typeof exports) == "object"
+ &&
+ (typeof module) == "object"
+ &&
+ (typeof __filename) == "string"
+ &&
+ (typeof __dirname) == "string"
+ );
+}
+
+if (!RunningInNode()) {
+ if (!this.Tautologistics)
+ this.Tautologistics = {};
+ if (!this.Tautologistics.NodeHtmlParser)
+ this.Tautologistics.NodeHtmlParser = {};
+ if (!this.Tautologistics.NodeHtmlParser.Tests)
+ this.Tautologistics.NodeHtmlParser.Tests = [];
+ exports = {};
+ this.Tautologistics.NodeHtmlParser.Tests.push(exports);
+}
+
+exports.name = "RSS (2.0)";
+//http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
+exports.html = '<?xml version="1.0"?>\
+<rss version="2.0">\
+ <channel>\
+ <title>Liftoff News</title>\
+ <link>http://liftoff.msfc.nasa.gov/</link>\
+ <description>Liftoff to Space Exploration.</description>\
+ <language>en-us</language>\
+ <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>\
+\
+ <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>\
+ <docs>http://blogs.law.harvard.edu/tech/rss</docs>\
+ <generator>Weblog Editor 2.0</generator>\
+ <managingEditor>editor@example.com</managingEditor>\
+ <webMaster>webmaster@example.com</webMaster>\
+ <item>\
+\
+ <title>Star City</title>\
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>\
+ <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>\
+ <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>\
+ <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>\
+\
+ </item>\
+ <item>\
+ <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>\
+ <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>\
+ <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>\
+\
+ </item>\
+ <item>\
+ <title>The Engine That Does More</title>\
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>\
+ <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>\
+ <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>\
+ <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>\
+\
+ </item>\
+ <item>\
+ <title>Astronauts\' Dirty Laundry</title>\
+ <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>\
+ <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>\
+ <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>\
+ <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>\
+\
+ </item>\
+ </channel>\
+</rss>';
+exports.options = { };
+exports.type = "rss";
+exports.expected = {
+ type: "rss"
+ , id: ""
+ , title: "Liftoff News"
+ , link: "http://liftoff.msfc.nasa.gov/"
+ , description: "Liftoff to Space Exploration."
+ , updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT")
+ , author: "editor@example.com"
+ , items: [
+ {
+ id: "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
+ , title: "Star City"
+ , link: "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
+ , description: "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;."
+ , pubDate: new Date("Tue, 03 Jun 2003 09:39:21 GMT")
+ }
+ , {
+ id: "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
+ , description: "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st."
+ , pubDate: new Date("Fri, 30 May 2003 11:06:42 GMT")
+ }
+ , {
+ id: "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
+ , title: "The Engine That Does More"
+ , link: "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
+ , description: "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that."
+ , pubDate: new Date("Tue, 27 May 2003 08:37:32 GMT")
+ }
+ , {
+ id: "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
+ , title: "Astronauts' Dirty Laundry"
+ , link: "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
+ , description: "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options."
+ , pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
+ }
+ ]
+ };
+
+})();
77 tests/21-atom.js
View
@@ -0,0 +1,77 @@
+(function () {
+
+function RunningInNode () {
+ return(
+ (typeof require) == "function"
+ &&
+ (typeof exports) == "object"
+ &&
+ (typeof module) == "object"
+ &&
+ (typeof __filename) == "string"
+ &&
+ (typeof __dirname) == "string"
+ );
+}
+
+if (!RunningInNode()) {
+ if (!this.Tautologistics)
+ this.Tautologistics = {};
+ if (!this.Tautologistics.NodeHtmlParser)
+ this.Tautologistics.NodeHtmlParser = {};
+ if (!this.Tautologistics.NodeHtmlParser.Tests)
+ this.Tautologistics.NodeHtmlParser.Tests = [];
+ exports = {};
+ this.Tautologistics.NodeHtmlParser.Tests.push(exports);
+}
+
+exports.name = "Atom (1.0)";
+//http://en.wikipedia.org/wiki/Atom_%28standard%29
+exports.html = '<?xml version="1.0" encoding="utf-8"?>\
+\
+<feed xmlns="http://www.w3.org/2005/Atom">\
+\
+ <title>Example Feed</title>\
+ <subtitle>A subtitle.</subtitle>\
+ <link href="http://example.org/feed/" rel="self" />\
+ <link href="http://example.org/" />\
+ <id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>\
+ <updated>2003-12-13T18:30:02Z</updated>\
+ <author>\
+ <name>John Doe</name>\
+ <email>johndoe@example.com</email>\
+ </author>\
+\
+ <entry>\
+ <title>Atom-Powered Robots Run Amok</title>\
+ <link href="http://example.org/2003/12/13/atom03" />\
+ <link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>\
+ <link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>\
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>\
+ <updated>2003-12-13T18:30:02Z</updated>\
+ <summary>Some text.</summary>\
+ </entry>\
+\
+</feed>';
+exports.options = { };
+exports.type = "rss";
+exports.expected = {
+ type: "atom"
+ , id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"
+ , title: "Example Feed"
+ , link: "http://example.org/feed/"
+ , description: "A subtitle."
+ , updated: new Date("2003-12-13T18:30:02Z")
+ , author: "johndoe@example.com"
+ , items: [
+ {
+ id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
+ , title: "Atom-Powered Robots Run Amok"
+ , link: "http://example.org/2003/12/13/atom03"
+ , description: "Some text."
+ , pubDate: new Date("2003-12-13T18:30:02Z")
+ }
+ ]
+ };
+
+})();
8 utils_example.js
View
@@ -1,9 +1,9 @@
//node --prof --prof_auto profile.js
//deps/v8/tools/mac-tick-processor v8.log
var sys = require("sys");
-var htmlparser = require("./node-htmlparser");
+var htmlparser = require("./lib/node-htmlparser");
-var html = "<a>text a</a><b id='x'>text b</b><c class='y'>text c</c><d id='z' class='w'><e>text e</e></d><g class='g h i'>hhh</g>";
+var html = "<a>text a</a><b id='x'>text b</b><c class='y'>text c</c><d id='z' class='w'><e>text e</e></d><g class='g h i'>hhh</g><yy>hellow</yy><yy id='secondyy'>world</yy>";
var handler = new htmlparser.DefaultHandler(function(err, dom) {
if (err) {
@@ -25,6 +25,10 @@ var handler = new htmlparser.DefaultHandler(function(err, dom) {
nested = htmlparser.DomUtils.getElementsByTagName("e", nested);
nested = htmlparser.DomUtils.getElementsByTagType("text", nested);
sys.debug("nested: " + sys.inspect(nested, false, null));
+ var double = htmlparser.DomUtils.getElementsByTagName("yy", dom);
+ sys.debug("double: " + sys.inspect(double, false, null));
+ var single = htmlparser.DomUtils.getElements( { tag_name: "yy", id: "secondyy" }, dom);
+ sys.debug("single: " + sys.inspect(single, false, null));
}
}, { verbose: false });
var parser = new htmlparser.Parser(handler);
Please sign in to comment.
Something went wrong with that request. Please try again.