Skip to content
Browse files

Changed how internal case insensitive tag evaluation occurs

  • Loading branch information...
1 parent 403fe86 commit 7ded5ba4e66666fe2c9cac96dde0aa3825b50adb @tautologistics committed
Showing with 186 additions and 124 deletions.
  1. +175 −114 README.md
  2. +9 −8 lib/htmlparser.js
  3. +2 −2 lib/htmlparser.min.js
View
289 README.md
@@ -14,165 +14,226 @@ A forgiving HTML/XML/RSS parser written in JS for both the browser and NodeJS (y
View runtests.html in any browser
##Usage In Node
- var htmlparser = require("htmlparser");
- var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
- var handler = new htmlparser.DefaultHandler(function (error, dom) {
- if (error)
- [...do something for errors...]
- else
- [...parsing done, do something...]
- });
- var parser = new htmlparser.Parser(handler);
- parser.parseComplete(rawHtml);
- sys.puts(sys.inspect(handler.dom, false, null));
+
+```javascript
+var htmlparser = require("htmlparser");
+var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
+var handler = new htmlparser.DefaultHandler(function (error, dom) {
+ if (error)
+ [...do something for errors...]
+ else
+ [...parsing done, do something...]
+});
+var parser = new htmlparser.Parser(handler);
+parser.parseComplete(rawHtml);
+sys.puts(sys.inspect(handler.dom, false, null));
+```
##Usage In Browser
- var handler = new Tautologistics.NodeHtmlParser.DefaultHandler(function (error, dom) {
- if (error)
- [...do something for errors...]
- else
- [...parsing done, do something...]
- });
- var parser = new Tautologistics.NodeHtmlParser.Parser(handler);
- parser.parseComplete(document.body.innerHTML);
- alert(JSON.stringify(handler.dom, null, 2));
+
+```javascript
+var handler = new Tautologistics.NodeHtmlParser.DefaultHandler(function (error, dom) {
+ if (error)
+ [...do something for errors...]
+ else
+ [...parsing done, do something...]
+});
+var parser = new Tautologistics.NodeHtmlParser.Parser(handler);
+parser.parseComplete(document.body.innerHTML);
+alert(JSON.stringify(handler.dom, null, 2));
+```
##Example output
- [ { raw: 'Xyz ', data: 'Xyz ', type: 'text' }
- , { raw: 'script language= javascript'
- , data: 'script language= javascript'
- , type: 'script'
- , name: 'script'
- , attribs: { language: 'javascript' }
- , children:
- [ { raw: 'var foo = \'<bar>\';<'
- , data: 'var foo = \'<bar>\';<'
- , type: 'text'
- }
- ]
- }
- , { raw: '<!-- Waah! -- '
- , data: '<!-- Waah! -- '
- , type: 'comment'
- }
- ]
+
+```javascript
+[ { raw: 'Xyz ', data: 'Xyz ', type: 'text' }
+ , { raw: 'script language= javascript'
+ , data: 'script language= javascript'
+ , type: 'script'
+ , name: 'script'
+ , attribs: { language: 'javascript' }
+ , children:
+ [ { raw: 'var foo = \'<bar>\';<'
+ , data: 'var foo = \'<bar>\';<'
+ , type: 'text'
+ }
+ ]
+ }
+, { raw: '<!-- Waah! -- '
+ , data: '<!-- Waah! -- '
+ , type: 'comment'
+ }
+]
+```
##Streaming To Parser
- while (...) {
- ...
- parser.parseChunk(chunk);
- }
- parser.done();
+
+```javascript
+while (...) {
+ ...
+ parser.parseChunk(chunk);
+}
+parser.done();
+```
##Parsing RSS/Atom Feeds
- new htmlparser.RssHandler(function (error, dom) {
- ...
- });
+```javascript
+new htmlparser.RssHandler(function (error, dom) {
+ ...
+});
+```
##DefaultHandler Options
###Usage
- var handler = new htmlparser.DefaultHandler(
- function (error) { ... }
- , { verbose: false, ignoreWhitespace: true }
- );
-
+
+```javascript
+var handler = new htmlparser.DefaultHandler(
+ function (error) { ... }
+ , { verbose: false, ignoreWhitespace: true }
+ );
+```
+
###Option: ignoreWhitespace
Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false".
####Example: true
+
The following HTML:
- <font>
- <br>this is the text
- <font>
+
+```html
+<font>
+ <br>this is the text
+<font>
+```
+
becomes:
- [ { raw: 'font'
- , data: 'font'
- , type: 'tag'
- , name: 'font'
- , children:
- [ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
- , { raw: 'this is the text\n'
- , data: 'this is the text\n'
- , type: 'text'
- }
- , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
- ]
- }
- ]
+
+```javascript
+[ { raw: 'font'
+ , data: 'font'
+ , type: 'tag'
+ , name: 'font'
+ , children:
+ [ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
+ , { raw: 'this is the text\n'
+ , data: 'this is the text\n'
+ , type: 'text'
+ }
+ , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
+ ]
+ }
+]
+```
####Example: false
+
The following HTML:
- <font>
- <br>this is the text
- <font>
+
+```html
+<font>
+ <br>this is the text
+<font>
+```
+
becomes:
- [ { raw: 'font'
- , data: 'font'
- , type: 'tag'
- , name: 'font'
- , children:
- [ { raw: '\n\t', data: '\n\t', type: 'text' }
- , { raw: 'br', data: 'br', type: 'tag', name: 'br' }
- , { raw: 'this is the text\n'
- , data: 'this is the text\n'
- , type: 'text'
- }
- , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
- ]
- }
- ]
+
+```javascript
+[ { raw: 'font'
+ , data: 'font'
+ , type: 'tag'
+ , name: 'font'
+ , children:
+ [ { raw: '\n\t', data: '\n\t', type: 'text' }
+ , { raw: 'br', data: 'br', type: 'tag', name: 'br' }
+ , { raw: 'this is the text\n'
+ , data: 'this is the text\n'
+ , type: 'text'
+ }
+ , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
+ ]
+ }
+]
+```
###Option: verbose
Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true".
####Example: true
The following HTML:
- <a href="test.html">xxx</a>
+
+```html
+<a href="test.html">xxx</a>
+```
+
becomes:
- [ { raw: 'a href="test.html"'
- , data: 'a href="test.html"'
- , type: 'tag'
- , name: 'a'
- , attribs: { href: 'test.html' }
- , children: [ { raw: 'xxx', data: 'xxx', type: 'text' } ]
- }
- ]
+
+```javascript
+[ { raw: 'a href="test.html"'
+ , data: 'a href="test.html"'
+ , type: 'tag'
+ , name: 'a'
+ , attribs: { href: 'test.html' }
+ , children: [ { raw: 'xxx', data: 'xxx', type: 'text' } ]
+ }
+]
+```
####Example: false
The following HTML:
- <a href="test.html">xxx</a>
+
+```javascript
+<a href="test.html">xxx</a>
+```
+
becomes:
- [ { type: 'tag'
- , name: 'a'
- , attribs: { href: 'test.html' }
- , children: [ { data: 'xxx', type: 'text' } ]
- }
- ]
+
+```javascript
+[ { type: 'tag'
+ , name: 'a'
+ , attribs: { href: 'test.html' }
+ , children: [ { data: 'xxx', type: 'text' } ]
+ }
+]
+```
###Option: enforceEmptyTags
Indicates whether the DOM should prevent children on tags marked as empty in the HTML spec. Typically this should be set to "true" HTML parsing and "false" for XML parsing. The default value is "true".
####Example: true
The following HTML:
- <link>text</link>
+
+```html
+<link>text</link>
+```
+
becomes:
- [ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
- , { raw: 'text', data: 'text', type: 'text' }
- ]
+
+```javascript
+[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
+, { raw: 'text', data: 'text', type: 'text' }
+]
+```
####Example: false
The following HTML:
- <link>text</link>
+
+```html
+<link>text</link>
+```
+
becomes:
- [ { raw: 'link'
- , data: 'link'
- , type: 'tag'
- , name: 'link'
- , children: [ { raw: 'text', data: 'text', type: 'text' } ]
- }
- ]
+
+```javascript
+[ { raw: 'link'
+ , data: 'link'
+ , type: 'tag'
+ , name: 'link'
+ , children: [ { raw: 'text', data: 'text', type: 'text' } ]
+ }
+]
+```
##DomUtils
View
17 lib/htmlparser.js
@@ -18,7 +18,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
-/* v1.7.4 */
+/* v1.7.5 */
(function () {
@@ -227,13 +227,13 @@ function Parser (handler, options) {
, type: this._parseState
};
- var elementName = this.parseTagName(element.data).toLowerCase();
+ var elementName = this.parseTagName(element.data);
//This section inspects the current tag stack and modifies the current
//element if we're actually parsing a special area (script/comment/style tag)
if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
if (this._tagStack[this._tagStack.length - 1] == ElementType.Script) { //We're currently in a script tag
- if (elementName == "/script") //Actually, we're no longer in a script tag, so pop it off the stack
+ if (elementName.toLowerCase() == "/script") //Actually, we're no longer in a script tag, so pop it off the stack
this._tagStack.pop();
else { //Not a closing script tag
if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
@@ -249,7 +249,7 @@ function Parser (handler, options) {
}
}
else if (this._tagStack[this._tagStack.length - 1] == ElementType.Style) { //We're currently in a style tag
- if (elementName == "/style") //Actually, we're no longer in a style tag, so pop it off the stack
+ if (elementName.toLowerCase() == "/style") //Actually, we're no longer in a style tag, so pop it off the stack
this._tagStack.pop();
else {
if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
@@ -305,6 +305,7 @@ function Parser (handler, options) {
//Processing of non-special tags
if (element.type == ElementType.Tag) {
element.name = elementName;
+ var elementNameCI = elementName.toLowerCase();
if (element.raw.indexOf("!--") == 0) { //This tag is really comment
element.type = ElementType.Comment;
@@ -322,21 +323,21 @@ function Parser (handler, options) {
element.type = ElementType.Directive;
//TODO: what about CDATA?
}
- else if (element.name == "script") {
+ else if (elementNameCI == "script") {
element.type = ElementType.Script;
//Special tag, push onto the tag stack if not terminated
if (element.data.charAt(element.data.length - 1) != "/")
this._tagStack.push(ElementType.Script);
}
- else if (element.name == "/script")
+ else if (elementNameCI == "/script")
element.type = ElementType.Script;
- else if (element.name == "style") {
+ else if (elementNameCI == "style") {
element.type = ElementType.Style;
//Special tag, push onto the tag stack if not terminated
if (element.data.charAt(element.data.length - 1) != "/")
this._tagStack.push(ElementType.Style);
}
- else if (element.name == "/style")
+ else if (elementNameCI == "/style")
element.type = ElementType.Style;
if (element.name && element.name.charAt(0) == "/")
element.data = element.name;
View
4 lib/htmlparser.min.js
@@ -18,5 +18,5 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
***********************************************/
-/* v1.7.4 */
-(function(){function e(a,c){this._options=c?c:{};if(this._options.includeLocation==undefined)this._options.includeLocation=false;this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function i(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags== undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require=="function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"}; e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/;e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()}; e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer="";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current= this._elementsCurrent=0;this._location={row:0,col:0,charOffset:0,inBuffer:0};this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()};e.prototype._options=null;e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._location=null;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs= function(a){for(var c=a.length,b=0;b<c;){var h=a[b++];if(h.type==d.Tag||h.type==d.Script||h.type==d.style)this.parseAttribs(h)}return a};e.prototype.parseAttribs=function(a){if(!(a.type!=d.Script&&a.type!=d.Style&&a.type!=d.Tag)){var c=a.data.split(e._reWhitespace,1)[0];c=a.data.substring(c.length);if(!(c.length<1)){var b;for(e._reAttrib.lastIndex=0;b=e._reAttrib.exec(c);){if(a.attribs==undefined)a.attribs={};if(typeof b[1]=="string"&&b[1].length)a.attribs[b[1]]=b[2];else if(typeof b[3]=="string"&& b[3].length)a.attribs[b[3].toString()]=b[4].toString();else if(typeof b[5]=="string"&&b[5].length)a.attribs[b[5]]=b[6];else if(typeof b[7]=="string"&&b[7].length)a.attribs[b[7]]=b[7]}}}};e.prototype.parseTagName=function(a){if(a==null||a=="")return"";a=e._reTagName.exec(a);if(!a)return"";return(a[1]?"/":"")+a[2]};e.prototype.parseTags=function(){for(var a=this._buffer.length-1;e._reTags.test(this._buffer);){this._next=e._reTags.lastIndex-1;var c=this._buffer.charAt(this._next),b=this._buffer.substring(this._current, this._next);b={raw:b,data:this._parseState==d.Text?b:b.replace(e._reTrim,""),type:this._parseState};var h=this.parseTagName(b.data);if(this._tagStack.length)if(this._tagStack[this._tagStack.length-1]==d.Script)if(h=="/script")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text){var g=this._elements[this._elements.length-1];g.raw=g.data=g.raw+this._prevTagSep+b.raw;b.raw=b.data=""}}}else if(this._tagStack[this._tagStack.length- 1]==d.Style)if(h=="/style")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text){g=this._elements[this._elements.length-1];if(b.raw!=""){g.raw=g.data=g.raw+this._prevTagSep+b.raw;b.raw=b.data=""}else g.raw=g.data=g.raw+this._prevTagSep}else if(b.raw!="")b.raw=b.data=b.raw}}else if(this._tagStack[this._tagStack.length-1]==d.Comment){g=b.raw.length;if(b.raw.charAt(g-2)=="-"&&b.raw.charAt(g-1)=="-"&&c==">"){this._tagStack.pop(); if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=(g.raw+b.raw).replace(e._reTrimComment,"");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=g.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=h;if(b.raw.indexOf("!--")== 0){b.type=d.Comment;delete b.name;g=b.raw.length;if(b.raw.charAt(g-1)=="-"&&b.raw.charAt(g-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+=c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&& this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){if(this._options.includeLocation&&!b.location)b.location=this.getLocation(b.type==d.Tag);this.parseAttribs(b);this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current= this._next+1;this._prevTagSep=c}if(this._options.includeLocation){this.getLocation();this._location.row+=this._location.inBuffer;this._location.inBuffer=0;this._location.charOffset=0}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.getLocation=function(a){for(var c=this._location,b=this._current-(a?1:0),h=a&&c.charOffset==0&&this._current==0;c.charOffset<b;c.charOffset++){a=this._buffer.charAt(c.charOffset);if(a=="\n"){c.inBuffer++; c.col=0}else a!="\r"&&c.col++}return{line:c.row+c.inBuffer+1,col:c.col+(h?0:1)}};e.prototype.validateHandler=function(a){if(typeof a!="object")throw Error("Handler is not an object");if(typeof a.reset!="function")throw Error("Handler method 'reset' is invalid");if(typeof a.done!="function")throw Error("Handler method 'done' is invalid");if(typeof a.writeTag!="function")throw Error("Handler method 'writeTag' is invalid");if(typeof a.writeText!="function")throw Error("Handler method 'writeText' is invalid"); if(typeof a.writeComment!="function")throw Error("Handler method 'writeComment' is invalid");if(typeof a.writeDirective!="function")throw Error("Handler method 'writeDirective' is invalid");};e.prototype.writeHandler=function(a){a=!!a;if(!(this._tagStack.length&&!a))for(;this._elements.length;){a=this._elements.shift();switch(a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a)}}}; e.prototype.handleError=function(a){if(typeof this._handler.error=="function")this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype;a.super_=c;a.prototype=new b;a.prototype.constructor=a})(n,i);n.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(j){return j=="rss"||j=="feed"},this.dom,false);if(b.length)c=b[0];if(c){if(c.name=="rss"){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(h){}try{a.link= f.getElementsByTagName("link",c.children,false)[0].children[0].data}catch(g){}try{a.description=f.getElementsByTagName("description",c.children,false)[0].children[0].data}catch(l){}try{a.updated=new Date(f.getElementsByTagName("lastBuildDate",c.children,false)[0].children[0].data)}catch(m){}try{a.author=f.getElementsByTagName("managingEditor",c.children,false)[0].children[0].data}catch(o){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(j){var k={};try{k.id=f.getElementsByTagName("guid", j.children,false)[0].children[0].data}catch(q){}try{k.title=f.getElementsByTagName("title",j.children,false)[0].children[0].data}catch(r){}try{k.link=f.getElementsByTagName("link",j.children,false)[0].children[0].data}catch(s){}try{k.description=f.getElementsByTagName("description",j.children,false)[0].children[0].data}catch(t){}try{k.pubDate=new Date(f.getElementsByTagName("pubDate",j.children,false)[0].children[0].data)}catch(u){}a.items.push(k)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id", c.children,false)[0].children[0].data}catch(p){}try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(v){}try{a.link=f.getElementsByTagName("link",c.children,false)[0].attribs.href}catch(w){}try{a.description=f.getElementsByTagName("subtitle",c.children,false)[0].children[0].data}catch(x){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,false)[0].children[0].data)}catch(y){}try{a.author=f.getElementsByTagName("email",c.children,true)[0].children[0].data}catch(z){}a.items= [];f.getElementsByTagName("entry",c.children).forEach(function(j){var k={};try{k.id=f.getElementsByTagName("id",j.children,false)[0].children[0].data}catch(q){}try{k.title=f.getElementsByTagName("title",j.children,false)[0].children[0].data}catch(r){}try{k.link=f.getElementsByTagName("link",j.children,false)[0].attribs.href}catch(s){}try{k.description=f.getElementsByTagName("summary",j.children,false)[0].children[0].data}catch(t){}try{k.pubDate=new Date(f.getElementsByTagName("updated",j.children, false)[0].children[0].data)}catch(u){}a.items.push(k)})}this.dom=a}n.super_.prototype.done.call(this)};i._emptyTags={area:1,base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};i.reWhitespace=/^\s*$/;i.prototype.dom=null;i.prototype.reset=function(){this.dom=[];this._done=false;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};i.prototype.done=function(){this._done=true;this.handleCallback(null)};i.prototype.writeTag= function(a){this.handleElement(a)};i.prototype.writeText=function(a){if(this._options.ignoreWhitespace)if(i.reWhitespace.test(a.data))return;this.handleElement(a)};i.prototype.writeComment=function(a){this.handleElement(a)};i.prototype.writeDirective=function(a){this.handleElement(a)};i.prototype.error=function(a){this.handleCallback(a)};i.prototype._options=null;i.prototype._callback=null;i.prototype._done=false;i.prototype._tagStack=null;i.prototype.handleCallback=function(a){if(typeof this._callback!= "function")if(a)throw a;else return;this._callback(a,this.dom)};i.prototype.isEmptyTag=function(a){a=a.name.toLowerCase();if(a.charAt(0)=="/")a=a.substring(1);return this._options.enforceEmptyTags&&!!i._emptyTags[a]};i.prototype.handleElement=function(a){this._done&&this.handleCallback(Error("Writing to the handler after done() called is not allowed without a reset()"));if(!this._options.verbose){delete a.raw;if(a.type=="tag"||a.type=="script"||a.type=="style")delete a.data}if(this._tagStack.last())if(a.type!= d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if(a.name.charAt(0)=="/"){var c=a.name.substring(1);if(!this.isEmptyTag(a)){for(a=this._tagStack.length-1;a>-1&&this._tagStack[a--].name!=c;);if(a>-1||this._tagStack[0].name==c)for(;a<this._tagStack.length-1;)this._tagStack.pop()}}else{if(!this._tagStack.last().children)this._tagStack.last().children=[];this._tagStack.last().children.push(a);this.isEmptyTag(a)||this._tagStack.push(a)}else{if(!this._tagStack.last().children)this._tagStack.last().children= [];this._tagStack.last().children.push(a)}else if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive){if(a.name.charAt(0)!="/"){this.dom.push(a);this.isEmptyTag(a)||this._tagStack.push(a)}}else this.dom.push(a)};var f={testElement:function(a,c){if(!c)return false;for(var b in a)if(b=="tag_name"){if(c.type!="tag"&&c.type!="script"&&c.type!="style")return false;if(!a.tag_name(c.name))return false}else if(b=="tag_type"){if(!a.tag_type(c.type))return false}else if(b=="tag_contains"){if(c.type!="text"&& c.type!="comment"&&c.type!="directive")return false;if(!a.tag_contains(c.data))return false}else if(!c.attribs||!a[b](c.attribs[b]))return false;return true},getElements:function(a,c,b,h){function g(o){return function(p){return p==o}}b=b===undefined||b===null||!!b;h=isNaN(parseInt(h))?-1:parseInt(h);if(!c)return[];var l=[],m;for(m in a)if(typeof a[m]!="function")a[m]=g(a[m]);f.testElement(a,c)&&l.push(c);if(h>=0&&l.length>=h)return l;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return l; for(m=0;m<c.length;m++){l=l.concat(f.getElements(a,c[m],b,h));if(h>=0&&l.length>=h)break}return l},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,h){return f.getElements({tag_name:a},c,b,h)},getElementsByTagType:function(a,c,b,h){return f.getElements({tag_type:a},c,b,h)}};exports.Parser=e;exports.DefaultHandler=i;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})();
+/* v1.7.5 */
+(function(){function e(a,c){this._options=c?c:{};void 0==this._options.includeLocation&&(this._options.includeLocation=!1);this.validateHandler(a);this._handler=a;this.reset()}function j(a){j.super_.call(this,a,{ignoreWhitespace:!0,verbose:!1,enforceEmptyTags:!1})}function g(a,c){this.reset();this._options=c?c:{};void 0==this._options.ignoreWhitespace&&(this._options.ignoreWhitespace=!1);void 0==this._options.verbose&&(this._options.verbose=!0);void 0==this._options.enforceEmptyTags&&(this._options.enforceEmptyTags= !0);"function"==typeof a&&(this._callback=a)}if(!("function"==typeof require&&"object"==typeof exports&&"object"==typeof module&&"string"==typeof __filename&&"string"==typeof __dirname)){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"};e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment= /(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/;e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()};e.prototype.done=function(){if(!this._done){this._done= !0;if(this._buffer.length){var a=this._buffer;this._buffer="";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=!1;this._elements=[];this._next=this._current=this._elementsCurrent=0;this._location={row:0,col:0, charOffset:0,inBuffer:0};this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()};e.prototype._options=null;e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=!1;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._location=null;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs=function(a){for(var c=a.length,b=0;b<c;){var e=a[b++]; (e.type==d.Tag||e.type==d.Script||e.type==d.style)&&this.parseAttribs(e)}return a};e.prototype.parseAttribs=function(a){if(!(a.type!=d.Script&&a.type!=d.Style&&a.type!=d.Tag)){var c=a.data.split(e._reWhitespace,1)[0],c=a.data.substring(c.length);if(!(1>c.length)){var b;for(e._reAttrib.lastIndex=0;b=e._reAttrib.exec(c);)void 0==a.attribs&&(a.attribs={}),"string"==typeof b[1]&&b[1].length?a.attribs[b[1]]=b[2]:"string"==typeof b[3]&&b[3].length?a.attribs[b[3].toString()]=b[4].toString():"string"==typeof b[5]&& b[5].length?a.attribs[b[5]]=b[6]:"string"==typeof b[7]&&b[7].length&&(a.attribs[b[7]]=b[7])}}};e.prototype.parseTagName=function(a){if(null==a||""==a)return"";a=e._reTagName.exec(a);return!a?"":(a[1]?"/":"")+a[2]};e.prototype.parseTags=function(){for(var a=this._buffer.length-1;e._reTags.test(this._buffer);){this._next=e._reTags.lastIndex-1;var c=this._buffer.charAt(this._next),b=this._buffer.substring(this._current,this._next),b={raw:b,data:this._parseState==d.Text?b:b.replace(e._reTrim,""),type:this._parseState}, f=this.parseTagName(b.data);if(this._tagStack.length)if(this._tagStack[this._tagStack.length-1]==d.Script)if("/script"==f.toLowerCase())this._tagStack.pop();else{if(0!=b.raw.indexOf("!--")&&(b.type=d.Text,this._elements.length&&this._elements[this._elements.length-1].type==d.Text)){var h=this._elements[this._elements.length-1];h.raw=h.data=h.raw+this._prevTagSep+b.raw;b.raw=b.data=""}}else this._tagStack[this._tagStack.length-1]==d.Style?"/style"==f.toLowerCase()?this._tagStack.pop():0!=b.raw.indexOf("!--")&& (b.type=d.Text,this._elements.length&&this._elements[this._elements.length-1].type==d.Text?(h=this._elements[this._elements.length-1],""!=b.raw?(h.raw=h.data=h.raw+this._prevTagSep+b.raw,b.raw=b.data=""):h.raw=h.data=h.raw+this._prevTagSep):""!=b.raw&&(b.raw=b.data=b.raw)):this._tagStack[this._tagStack.length-1]==d.Comment&&(h=b.raw.length,"-"==b.raw.charAt(h-2)&&"-"==b.raw.charAt(h-1)&&">"==c?(this._tagStack.pop(),this._elements.length&&this._elements[this._elements.length-1].type==d.Comment?(h= this._elements[this._elements.length-1],h.raw=h.data=(h.raw+b.raw).replace(e._reTrimComment,""),b.raw=b.data="",b.type=d.Text):b.type=d.Comment):(b.type=d.Comment,this._elements.length&&this._elements[this._elements.length-1].type==d.Comment?(h=this._elements[this._elements.length-1],h.raw=h.data=h.raw+b.raw+c,b.raw=b.data="",b.type=d.Text):b.raw=b.data=b.raw+c));if(b.type==d.Tag&&(b.name=f,f=f.toLowerCase(),0==b.raw.indexOf("!--")?(b.type=d.Comment,delete b.name,h=b.raw.length,"-"==b.raw.charAt(h- 1)&&"-"==b.raw.charAt(h-2)&&">"==c?b.raw=b.data=b.raw.replace(e._reTrimComment,""):(b.raw+=c,this._tagStack.push(d.Comment))):0==b.raw.indexOf("!")||0==b.raw.indexOf("?")?b.type=d.Directive:"script"==f?(b.type=d.Script,"/"!=b.data.charAt(b.data.length-1)&&this._tagStack.push(d.Script)):"/script"==f?b.type=d.Script:"style"==f?(b.type=d.Style,"/"!=b.data.charAt(b.data.length-1)&&this._tagStack.push(d.Style)):"/style"==f&&(b.type=d.Style),b.name&&"/"==b.name.charAt(0)))b.data=b.name;if(""!=b.raw||b.type!= d.Text)this._options.includeLocation&&!b.location&&(b.location=this.getLocation(b.type==d.Tag)),this.parseAttribs(b),this._elements.push(b),b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&"/"==b.data.charAt(b.data.length-1)&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type});this._parseState="<"==c?d.Tag:d.Text;this._current=this._next+1;this._prevTagSep=c}this._options.includeLocation&&(this.getLocation(),this._location.row+=this._location.inBuffer,this._location.inBuffer= 0,this._location.charOffset=0);this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.getLocation=function(a){for(var c=this._location,b=this._current-(a?1:0),d=a&&0==c.charOffset&&0==this._current;c.charOffset<b;c.charOffset++)a=this._buffer.charAt(c.charOffset),"\n"==a?(c.inBuffer++,c.col=0):"\r"!=a&&c.col++;return{line:c.row+c.inBuffer+1,col:c.col+(d?0:1)}};e.prototype.validateHandler=function(a){if("object"!=typeof a)throw Error("Handler is not an object"); if("function"!=typeof a.reset)throw Error("Handler method 'reset' is invalid");if("function"!=typeof a.done)throw Error("Handler method 'done' is invalid");if("function"!=typeof a.writeTag)throw Error("Handler method 'writeTag' is invalid");if("function"!=typeof a.writeText)throw Error("Handler method 'writeText' is invalid");if("function"!=typeof a.writeComment)throw Error("Handler method 'writeComment' is invalid");if("function"!=typeof a.writeDirective)throw Error("Handler method 'writeDirective' is invalid"); };e.prototype.writeHandler=function(a){if(!this._tagStack.length||a)for(;this._elements.length;)switch(a=this._elements.shift(),a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a)}};e.prototype.handleError=function(a){if("function"==typeof this._handler.error)this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype;a.super_= c;a.prototype=new b;a.prototype.constructor=a})(j,g);j.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(a){return"rss"==a||"feed"==a},this.dom,!1);b.length&&(c=b[0]);if(c){if("rss"==c.name){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,!1)[0].children[0].data}catch(d){}try{a.link=f.getElementsByTagName("link",c.children,!1)[0].children[0].data}catch(e){}try{a.description=f.getElementsByTagName("description",c.children,!1)[0].children[0].data}catch(g){}try{a.updated= new Date(f.getElementsByTagName("lastBuildDate",c.children,!1)[0].children[0].data)}catch(i){}try{a.author=f.getElementsByTagName("managingEditor",c.children,!1)[0].children[0].data}catch(l){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(b){var c={};try{c.id=f.getElementsByTagName("guid",b.children,!1)[0].children[0].data}catch(d){}try{c.title=f.getElementsByTagName("title",b.children,!1)[0].children[0].data}catch(e){}try{c.link=f.getElementsByTagName("link",b.children,!1)[0].children[0].data}catch(h){}try{c.description= f.getElementsByTagName("description",b.children,!1)[0].children[0].data}catch(g){}try{c.pubDate=new Date(f.getElementsByTagName("pubDate",b.children,!1)[0].children[0].data)}catch(k){}a.items.push(c)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id",c.children,!1)[0].children[0].data}catch(m){}try{a.title=f.getElementsByTagName("title",c.children,!1)[0].children[0].data}catch(n){}try{a.link=f.getElementsByTagName("link",c.children,!1)[0].attribs.href}catch(o){}try{a.description=f.getElementsByTagName("subtitle", c.children,!1)[0].children[0].data}catch(p){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,!1)[0].children[0].data)}catch(q){}try{a.author=f.getElementsByTagName("email",c.children,!0)[0].children[0].data}catch(r){}a.items=[];f.getElementsByTagName("entry",c.children).forEach(function(b){var c={};try{c.id=f.getElementsByTagName("id",b.children,!1)[0].children[0].data}catch(d){}try{c.title=f.getElementsByTagName("title",b.children,!1)[0].children[0].data}catch(e){}try{c.link=f.getElementsByTagName("link", b.children,!1)[0].attribs.href}catch(h){}try{c.description=f.getElementsByTagName("summary",b.children,!1)[0].children[0].data}catch(g){}try{c.pubDate=new Date(f.getElementsByTagName("updated",b.children,!1)[0].children[0].data)}catch(k){}a.items.push(c)})}this.dom=a}j.super_.prototype.done.call(this)};g._emptyTags={area:1,base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};g.reWhitespace=/^\s*$/;g.prototype.dom=null;g.prototype.reset=function(){this.dom= [];this._done=!1;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};g.prototype.done=function(){this._done=!0;this.handleCallback(null)};g.prototype.writeTag=function(a){this.handleElement(a)};g.prototype.writeText=function(a){(!this._options.ignoreWhitespace||!g.reWhitespace.test(a.data))&&this.handleElement(a)};g.prototype.writeComment=function(a){this.handleElement(a)};g.prototype.writeDirective=function(a){this.handleElement(a)};g.prototype.error=function(a){this.handleCallback(a)}; g.prototype._options=null;g.prototype._callback=null;g.prototype._done=!1;g.prototype._tagStack=null;g.prototype.handleCallback=function(a){if("function"!=typeof this._callback){if(a)throw a;}else this._callback(a,this.dom)};g.prototype.isEmptyTag=function(a){a=a.name.toLowerCase();"/"==a.charAt(0)&&(a=a.substring(1));return this._options.enforceEmptyTags&&!!g._emptyTags[a]};g.prototype.handleElement=function(a){this._done&&this.handleCallback(Error("Writing to the handler after done() called is not allowed without a reset()")); this._options.verbose||(delete a.raw,("tag"==a.type||"script"==a.type||"style"==a.type)&&delete a.data);if(this._tagStack.last())if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if("/"==a.name.charAt(0)){var c=a.name.substring(1);if(!this.isEmptyTag(a)){for(a=this._tagStack.length-1;-1<a&&this._tagStack[a--].name!=c;);if(-1<a||this._tagStack[0].name==c)for(;a<this._tagStack.length-1;)this._tagStack.pop()}}else this._tagStack.last().children||(this._tagStack.last().children=[]),this._tagStack.last().children.push(a), this.isEmptyTag(a)||this._tagStack.push(a);else this._tagStack.last().children||(this._tagStack.last().children=[]),this._tagStack.last().children.push(a);else a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive?"/"!=a.name.charAt(0)&&(this.dom.push(a),this.isEmptyTag(a)||this._tagStack.push(a)):this.dom.push(a)};var f={testElement:function(a,c){if(!c)return!1;for(var b in a)if("tag_name"==b){if("tag"!=c.type&&"script"!=c.type&&"style"!=c.type||!a.tag_name(c.name))return!1}else if("tag_type"== b){if(!a.tag_type(c.type))return!1}else if("tag_contains"==b){if("text"!=c.type&&"comment"!=c.type&&"directive"!=c.type||!a.tag_contains(c.data))return!1}else if(!c.attribs||!a[b](c.attribs[b]))return!1;return!0},getElements:function(a,c,b,d){function e(a){return function(b){return b==a}}b=void 0===b||null===b||!!b;d=isNaN(parseInt(d))?-1:parseInt(d);if(!c)return[];var g=[],i;for(i in a)"function"!=typeof a[i]&&(a[i]=e(a[i]));f.testElement(a,c)&&g.push(c);if(0<=d&&g.length>=d)return g;if(b&&c.children)c= c.children;else if(!(c instanceof Array))return g;for(i=0;i<c.length&&!(g=g.concat(f.getElements(a,c[i],b,d)),0<=d&&g.length>=d);i++);return g},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,d){return f.getElements({tag_name:a},c,b,d)},getElementsByTagType:function(a,c,b,d){return f.getElements({tag_type:a},c,b,d)}};exports.Parser=e;exports.DefaultHandler=g;exports.RssHandler=j;exports.ElementType=d;exports.DomUtils=f})();

0 comments on commit 7ded5ba

Please sign in to comment.
Something went wrong with that request. Please try again.