diff --git a/src/evented-tokenizer.ts b/src/evented-tokenizer.ts index 51d6512..80d0b4b 100644 --- a/src/evented-tokenizer.ts +++ b/src/evented-tokenizer.ts @@ -22,6 +22,7 @@ export default class EventedTokenizer { reset() { this.transitionTo(TokenizerState.beforeData); this.input = ''; + this.tagNameBuffer = ''; this.index = 0; this.line = 1; @@ -113,21 +114,31 @@ export default class EventedTokenizer { this.delegate.appendToTagName(char); } + private isIgnoredEndTag(): boolean { + let tag = this.tagNameBuffer.toLowerCase(); + + return (tag === 'title' && this.input.substring(this.index, this.index + 8) !== '') || + (tag === 'style' && this.input.substring(this.index, this.index + 8) !== '') || + (tag === 'script' && this.input.substring(this.index, this.index + 9) !== ''); + } + states: { [k in TokenizerState]?: (this: EventedTokenizer) => void } = { beforeData() { let char = this.peek(); - if (char === '<') { + if (char === '<' && !this.isIgnoredEndTag()) { this.transitionTo(TokenizerState.tagOpen); this.markTagStart(); this.consume(); } else { if (char === '\n') { let tag = this.tagNameBuffer.toLowerCase(); + if (tag === 'pre' || tag === 'textarea') { this.consume(); } } + this.transitionTo(TokenizerState.data); this.delegate.beginData(); } @@ -135,13 +146,14 @@ export default class EventedTokenizer { data() { let char = this.peek(); + let tag = this.tagNameBuffer.toLowerCase(); - if (char === '<') { + if (char === '<' && !this.isIgnoredEndTag()) { this.delegate.finishData(); this.transitionTo(TokenizerState.tagOpen); this.markTagStart(); this.consume(); - } else if (char === '&') { + } else if (char === '&' && tag !== 'script' && tag !== 'style') { this.consume(); this.delegate.appendToData(this.consumeCharRef() || '&'); } else { @@ -168,7 +180,7 @@ export default class EventedTokenizer { markupDeclarationOpen() { let char = this.consume(); - if (char === '-' && this.input.charAt(this.index) === '-') { + if (char === '-' && this.peek() === '-') { this.consume(); this.transitionTo(TokenizerState.commentStart); this.delegate.beginComment(); @@ -251,6 +263,24 @@ export default class EventedTokenizer { } }, + endTagName() { + let char = this.consume(); + + if (isSpace(char)) { + this.transitionTo(TokenizerState.beforeAttributeName); + this.tagNameBuffer = ''; + } else if (char === '/') { + this.transitionTo(TokenizerState.selfClosingStartTag); + this.tagNameBuffer = ''; + } else if (char === '>') { + this.delegate.finishTag(); + this.transitionTo(TokenizerState.beforeData); + this.tagNameBuffer = ''; + } else { + this.appendToTagName(char); + } + }, + beforeAttributeName() { let char = this.peek(); @@ -453,7 +483,7 @@ export default class EventedTokenizer { let char = this.consume(); if (char === '@' || char === ':' || isAlpha(char)) { - this.transitionTo(TokenizerState.tagName); + this.transitionTo(TokenizerState.endTagName); this.tagNameBuffer = ''; this.delegate.beginEndTag(); this.appendToTagName(char); diff --git a/src/generated/tokenizer-states.ts b/src/generated/tokenizer-states.ts index 747c277..1c52cb1 100644 --- a/src/generated/tokenizer-states.ts +++ b/src/generated/tokenizer-states.ts @@ -12,6 +12,7 @@ export const enum TokenizerState { tagOpen = 'tagOpen', endTagOpen = 'endTagOpen', tagName = 'tagName', + endTagName = 'endTagName', rcdataLessThanSign = 'rcdataLessThanSign', rcdataEndTagOpen = 'rcdataEndTagOpen', rcdataEndTagName = 'rcdataEndTagName', diff --git a/tests/tokenizer-tests.ts b/tests/tokenizer-tests.ts index 17fc894..98538f1 100644 --- a/tests/tokenizer-tests.ts +++ b/tests/tokenizer-tests.ts @@ -205,6 +205,11 @@ QUnit.test('A newline immediately following a
 tag is stripped', function(a
   assert.deepEqual(tokens, [startTag('pre'), chars('hello'), endTag('pre')]);
 });
 
+QUnit.test('A newline immediately following a closing 
tag is not stripped', function(assert) { + let tokens = tokenize("\n
\nhello
\n"); + assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('hello'), endTag('pre'), chars('\n')]); +}); + // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('A newline immediately following a
 tag is stripped', function(assert) {
   let tokens = tokenize("
\nhello
"); @@ -217,6 +222,38 @@ QUnit.test('A newline immediately following a