Skip to content

Loading…

Fixed parsing of an HTML tag as the first thing inside a <script>. #56

Closed
wants to merge 1 commit into from

1 participant

@papandreou

Hi!

When a <script> contains something that looks like markup as the first token, the < is not included in element.raw and element.data. This causes problems when parsing templates that use the type='text/html' hack, for example:

<html>
<body>
    <script type='text/html'><div></div></script>
</body>
</html>

... which makes the Text element come out as div></div>.

The above is also included as a test case.

I ran into this issue with jsdom, which still uses htmlparser 1.x.

@papandreou

@tautologistics: Any chance of getting this merged?

@papandreou

Never mind, all the software I care about is using other parsers by now.

@papandreou papandreou closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Showing with 64 additions and 4 deletions.
  1. +8 −4 lib/htmlparser.js
  2. +56 −0 tests/23-tag-as-the-first-thing-inside-script.js
View
12 lib/htmlparser.js
@@ -238,13 +238,17 @@ function Parser (handler, options) {
else { //Not a closing script tag
if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
//All data from here to script close is now a text element
- element.type = ElementType.Text;
//If the previous element is text, append the current text to it
- if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
+ if (this._elements.length) {
var prevElement = this._elements[this._elements.length - 1];
- prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
- element.raw = element.data = ""; //This causes the current element to not be added to the element list
+ if (prevElement.type === ElementType.Text) {
+ prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
+ element.raw = element.data = ""; //This causes the current element to not be added to the element list
+ } else if (element.type === 'tag') {
+ element.raw = element.data = '<' + element.raw;
+ }
}
+ element.type = ElementType.Text;
}
}
}
View
56 tests/23-tag-as-the-first-thing-inside-script.js
@@ -0,0 +1,56 @@
+(function () {
+
+function RunningInNode () {
+ return(
+ (typeof require) == "function"
+ &&
+ (typeof exports) == "object"
+ &&
+ (typeof module) == "object"
+ &&
+ (typeof __filename) == "string"
+ &&
+ (typeof __dirname) == "string"
+ );
+}
+
+if (!RunningInNode()) {
+ if (!this.Tautologistics)
+ this.Tautologistics = {};
+ if (!this.Tautologistics.NodeHtmlParser)
+ this.Tautologistics.NodeHtmlParser = {};
+ if (!this.Tautologistics.NodeHtmlParser.Tests)
+ this.Tautologistics.NodeHtmlParser.Tests = [];
+ exports = {};
+ this.Tautologistics.NodeHtmlParser.Tests.push(exports);
+}
+
+exports.name = "Tag as the first thing inside <script>";
+exports.options = {
+ handler: {}
+ , parser: {}
+};
+exports.html = "<head><script type=\"text/html\"><div></div></script></head>";
+exports.expected =
+[ { raw: 'head'
+ , data: 'head'
+ , type: 'tag'
+ , name: 'head'
+ , children:
+ [ { raw: 'script type="text/html"'
+ , data: 'script type="text/html"'
+ , type: 'script'
+ , name: 'script'
+ , attribs: { type: 'text/html' }
+ , children:
+ [ { raw: '<div></div>'
+ , data: '<div></div>'
+ , type: 'text'
+ }
+ ]
+ }
+ ]
+ }
+];
+
+})();
Something went wrong with that request. Please try again.