From 41270b8ded05719381eef091798665b3b6eee6cf Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Wed, 26 Oct 2022 22:53:14 -0400 Subject: [PATCH 1/5] fix: Support escapes within emphasis ...particularly right at the end. Fixes #2280 --- src/Tokenizer.js | 10 ++++++---- src/rules.js | 4 ++-- test/unit/Lexer-spec.js | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 76f26e67fb..b15e6936c6 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -629,22 +629,24 @@ export class Tokenizer { // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); + const raw = src.slice(0, lLength + match.index + (match[0].length - rDelim.length) + rLength); + // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { - const text = src.slice(1, lLength + match.index + rLength); + const text = raw.slice(1, -1); return { type: 'em', - raw: src.slice(0, lLength + match.index + rLength + 1), + raw, text, tokens: this.lexer.inlineTokens(text) }; } // Create 'strong' if smallest delimiter has even char count. **a*** - const text = src.slice(2, lLength + match.index + rLength - 1); + const text = raw.slice(2, -2); return { type: 'strong', - raw: src.slice(0, lLength + match.index + rLength + 1), + raw, text, tokens: this.lexer.inlineTokens(text) }; diff --git a/src/rules.js b/src/rules.js index 25d1415301..4db1e1afaf 100644 --- a/src/rules.js +++ b/src/rules.js @@ -169,8 +169,8 @@ export const inline = { lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a - rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, - rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ + rDelimAst: /^(?:[^_*\\]|\\.)*?\_\_(?:[^_*\\]|\\.)*?\*(?:[^_*\\]|\\.)*?(?=\_\_)|(?:[^*\\]|\\.)+(?=[^*])|[punct_](\*+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|(?:[^punct*_\s\\]|\\.)(\*+)(?=[^punct*_\s])/, + rDelimUnd: /^(?:[^_*\\]|\\.)*?\*\*(?:[^_*\\]|\\.)*?\_(?:[^_*\\]|\\.)*?(?=\*\*)|(?:[^_\\]|\\.)+(?=[^_])|[punct*](\_+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index 23913b6e4a..8174d53139 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -776,6 +776,41 @@ paragraph }); }); + it('escaped punctuation inside emphasis', () => { + expectInlineTokens({ + md: '**strong text\\[**\\]', + tokens: [ + { + type: 'strong', + raw: '**strong text\\[**', + text: 'strong text\\[', + tokens: [ + { type: 'text', raw: 'strong text', text: 'strong text' }, + { type: 'escape', raw: '\\[', text: '[' } + ] + }, + { type: 'escape', raw: '\\]', text: ']' } + ] + }); + expectInlineTokens({ + md: '_em\\sis_', + tokens: [ + { + type: 'em', + raw: '_em\\sis_', + text: 'em\\sis', + tokens: [ + { type: 'text', raw: 'em', text: 'em' }, + { type: 'escape', raw: '\\<', text: '<' }, + { type: 'text', raw: 'pha', text: 'pha' }, + { type: 'escape', raw: '\\>', text: '>' }, + { type: 'text', raw: 'sis', text: 'sis' } + ] + } + ] + }); + }); + it('html', () => { expectInlineTokens({ md: '
html
', From 2ee7bfd71cc3ddc61fd51cd70d39f780fc213acd Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Fri, 28 Oct 2022 17:31:36 -0400 Subject: [PATCH 2/5] chore: realign comments with regexp alternatives --- src/rules.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rules.js b/src/rules.js index 4db1e1afaf..ba9d7c5531 100644 --- a/src/rules.js +++ b/src/rules.js @@ -168,7 +168,7 @@ export const inline = { emStrong: { lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. - // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a + // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a rDelimAst: /^(?:[^_*\\]|\\.)*?\_\_(?:[^_*\\]|\\.)*?\*(?:[^_*\\]|\\.)*?(?=\_\_)|(?:[^*\\]|\\.)+(?=[^*])|[punct_](\*+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|(?:[^punct*_\s\\]|\\.)(\*+)(?=[^punct*_\s])/, rDelimUnd: /^(?:[^_*\\]|\\.)*?\*\*(?:[^_*\\]|\\.)*?\_(?:[^_*\\]|\\.)*?(?=\*\*)|(?:[^_\\]|\\.)+(?=[^_])|[punct*](\_+)(?=[\s]|$)|(?:[^punct*_\s\\]|\\.)(\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ }, From 8e1aa371bfdccefef9c1fe77369d55fd4647f461 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Fri, 28 Oct 2022 17:41:28 -0400 Subject: [PATCH 3/5] test: Add HTML test for escapes within emphasis --- test/specs/new/escape_within_emphasis.html | 5 +++++ test/specs/new/escape_within_emphasis.md | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 test/specs/new/escape_within_emphasis.html create mode 100644 test/specs/new/escape_within_emphasis.md diff --git a/test/specs/new/escape_within_emphasis.html b/test/specs/new/escape_within_emphasis.html new file mode 100644 index 0000000000..0138946b0c --- /dev/null +++ b/test/specs/new/escape_within_emphasis.html @@ -0,0 +1,5 @@ +

strong text[]

+ +

strong text\[]

+ +

em[pha](sis)

diff --git a/test/specs/new/escape_within_emphasis.md b/test/specs/new/escape_within_emphasis.md new file mode 100644 index 0000000000..310232d03c --- /dev/null +++ b/test/specs/new/escape_within_emphasis.md @@ -0,0 +1,5 @@ +**strong text\[**\] + +**strong text\\\[**\] + +_em\[pha\]\(sis\)_ From 2fbf7458961378422d56d2f4eb1eb83cafaa0f42 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Mon, 31 Oct 2022 11:14:12 -0400 Subject: [PATCH 4/5] fix: Correct recognition and masking of escaped emphasis punctuation --- src/Lexer.js | 3 ++- src/rules.js | 4 +++- test/specs/new/escape_within_emphasis.html | 2 ++ test/specs/new/escape_within_emphasis.md | 2 ++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index c4bbf41a83..acb0fc2996 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -350,7 +350,8 @@ export class Lexer { // Mask out escaped em & strong delimiters while ((match = this.tokenizer.rules.inline.escapedEmSt.exec(maskedSrc)) != null) { - maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex); + maskedSrc = maskedSrc.slice(0, match.index + match[0].length - 2) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedEmSt.lastIndex); + this.tokenizer.rules.inline.escapedEmSt.lastIndex--; } while (src) { diff --git a/src/rules.js b/src/rules.js index ba9d7c5531..faa9147ba3 100644 --- a/src/rules.js +++ b/src/rules.js @@ -186,7 +186,9 @@ inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._pu // sequences em should skip over [title](link), `code`, inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g; -inline.escapedEmSt = /\\\*|\\_/g; +// lookbehind is not available on Safari as of version 16 +// inline.escapedEmSt = /(?<=(?:^|[^\\)(?:\\[^])*)\\[*_]/g; +inline.escapedEmSt = /(?:^|[^\\])(?:\\[^])*\\[*_]/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); diff --git a/test/specs/new/escape_within_emphasis.html b/test/specs/new/escape_within_emphasis.html index 0138946b0c..c5885c607b 100644 --- a/test/specs/new/escape_within_emphasis.html +++ b/test/specs/new/escape_within_emphasis.html @@ -3,3 +3,5 @@

strong text\[]

em[pha](sis)

+ +

\

diff --git a/test/specs/new/escape_within_emphasis.md b/test/specs/new/escape_within_emphasis.md index 310232d03c..03a7295b85 100644 --- a/test/specs/new/escape_within_emphasis.md +++ b/test/specs/new/escape_within_emphasis.md @@ -3,3 +3,5 @@ **strong text\\\[**\] _em\[pha\]\(sis\)_ + +_\\_ From b365f2ac5d11d0b1f7bddc0080ff6acd785750ff Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 1 Nov 2022 01:22:14 -0400 Subject: [PATCH 5/5] fix: Correct backslash fake-lookbehind --- src/rules.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rules.js b/src/rules.js index faa9147ba3..11bfbf4ef7 100644 --- a/src/rules.js +++ b/src/rules.js @@ -188,7 +188,7 @@ inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._pu inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g; // lookbehind is not available on Safari as of version 16 // inline.escapedEmSt = /(?<=(?:^|[^\\)(?:\\[^])*)\\[*_]/g; -inline.escapedEmSt = /(?:^|[^\\])(?:\\[^])*\\[*_]/g; +inline.escapedEmSt = /(?:^|[^\\])(?:\\\\)*\\[*_]/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();