From 92e98d66c73ca2f6bce02e552ba7969c071fd665 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Tue, 31 Oct 2023 23:20:32 -0600 Subject: [PATCH 1/2] fix: fix emstrong unicode --- src/Tokenizer.ts | 5 ++--- test/specs/new/emoji_inline.html | 2 ++ test/specs/new/emoji_inline.md | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 34c8e7510b..e9088825be 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -629,7 +629,7 @@ export class _Tokenizer { endReg.lastIndex = 0; // Clip maskedSrc to same section of string as src (move to lexer?) - maskedSrc = maskedSrc.slice(-1 * src.length + match[0].length - 1); + maskedSrc = maskedSrc.slice(-1 * src.length + lLength); while ((match = endReg.exec(maskedSrc)) != null) { rDelim = match[1] || match[2] || match[3] || match[4] || match[5] || match[6]; @@ -654,8 +654,7 @@ export class _Tokenizer { // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); - - const raw = [...src].slice(0, lLength + match.index + rLength + 1).join(''); + const raw = src.slice(0, lLength + match.index + rLength + [...match[0]][0].length); // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { diff --git a/test/specs/new/emoji_inline.html b/test/specs/new/emoji_inline.html index 17d788c15e..7ea637d5d8 100644 --- a/test/specs/new/emoji_inline.html +++ b/test/specs/new/emoji_inline.html @@ -18,3 +18,5 @@

āš ļø test

Here, the emoji rendering works, but the text doesn't get rendered in italic.

šŸ’ test

+

tšŸ’t test

+

tšŸ’t test

diff --git a/test/specs/new/emoji_inline.md b/test/specs/new/emoji_inline.md index 5690933d30..7a9d4f2d67 100644 --- a/test/specs/new/emoji_inline.md +++ b/test/specs/new/emoji_inline.md @@ -37,3 +37,7 @@ Situations where it works: Here, the emoji rendering works, but the text doesn't get rendered in italic. *šŸ’ test* + +*tšŸ’t* test + +**tšŸ’t** test From 13a343e4eeceff812767de82df4032cd2ec9e3c4 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Wed, 1 Nov 2023 22:14:58 -0600 Subject: [PATCH 2/2] comment last char --- src/Tokenizer.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index e9088825be..94c2ed3b36 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -654,7 +654,9 @@ export class _Tokenizer { // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); - const raw = src.slice(0, lLength + match.index + rLength + [...match[0]][0].length); + // char length can be >1 for unicode characters; + const lastCharLength = [...match[0]][0].length; + const raw = src.slice(0, lLength + match.index + lastCharLength + rLength); // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) {