Skip to content

Commit

Permalink
Fix character references ending in raw nodes
Browse files Browse the repository at this point in the history
Closes GH-15.
  • Loading branch information
wooorm committed Nov 9, 2021
1 parent 5175c03 commit 9e1805d
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
19 changes: 18 additions & 1 deletion lib/index.js
Expand Up @@ -28,17 +28,21 @@
* Way too simple, but works for us.
* @property {HiddenPreprocessor} preprocessor
* @property {(value: string) => void} write
* @property {() => number} _consume
* @property {Array.<HiddenToken>} tokenQueue
* @property {string} state
* @property {string} returnState
* @property {number} charRefCode
* @property {Array.<number>} tempBuff
* @property {Function} _flushCodePointsConsumedAsCharacterReference
* @property {string} lastStartTagName
* @property {number} consumedAfterSnapshot
* @property {boolean} active
* @property {HiddenToken|undefined} currentCharacterToken
* @property {HiddenToken|undefined} currentToken
* @property {unknown} currentAttr
* @property {Function} NAMED_CHARACTER_REFERENCE_STATE
* @property {Function} NUMERIC_CHARACTER_REFERENCE_END_STATE
*
* @typedef {Object.<string, unknown> & {location: P5Location}} HiddenToken
*
Expand Down Expand Up @@ -359,7 +363,20 @@ export const raw =
// See the code for `parse` and `parseFragment`:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/parser/index.js#L371>.
tokenizer.write(node.value)
parser._runParsingLoop(undefined)
parser._runParsingLoop(null)

// Character references hang, so if we ended there, we need to flush
// those too.
// We reset the preprocessor as if the document ends here.
// Then one single call to the relevant state does the trick, parse5
// consumes the whole token.
if (
tokenizer.state === 'NAMED_CHARACTER_REFERENCE_STATE' ||
tokenizer.state === 'NUMERIC_CHARACTER_REFERENCE_END_STATE'
) {
preprocessor.lastChunkWritten = true
tokenizer[tokenizer.state](tokenizer._consume())
}

// Process final characters if they’re still there after hibernating.
// Similar to:
Expand Down
18 changes: 18 additions & 0 deletions test.js
Expand Up @@ -153,6 +153,24 @@ test('raw', (t) => {
'should pass raw nodes through even after textarea (#3)'
)

t.deepEqual(
raw(u('root', [u('raw', '&#123;and&#125;')])),
u('root', {data: {quirksMode: false}}, [u('text', '{and}')]),
'should pass character references through (decimal)'
)

t.deepEqual(
raw(u('root', [u('raw', '&lt;and&gt;')])),
u('root', {data: {quirksMode: false}}, [u('text', '<and>')]),
'should pass character references through (named)'
)

t.deepEqual(
raw(u('root', [u('raw', '&#x7b;and&#x7d;')])),
u('root', {data: {quirksMode: false}}, [u('text', '{and}')]),
'should pass character references through (hexadecimal)'
)

t.deepEqual(
raw(u('root', [u('raw', '<template>a<b></b>c</template>')])),
u('root', {data: {quirksMode: false}}, [
Expand Down

0 comments on commit 9e1805d

Please sign in to comment.