fix(richtext-lexical): combine 2 normalizeMarkdown implementations and fix code block regex (#10470)

GermanJablo · web-flow · commit 0252681313ca · 2025-01-13T14:51:26.000Z
This should fix it #10387 I don't know why we had 2 different copies of normalizeMarkdown. Also, the most up-to-date one still had a bug where lines were considered as if they were inside codeblocks when they weren't. How I tested that it works: 1. I copied the `normalizeMarkdown` implementation from this PR into the website repo, and made sure it is called before the conversion to editorState. 2. In the admin panel, sync docs. 3. In the admin panel, refresh mdx to lexical (new button, below sync docs). 4. Look for the examples from bug #10387 and verify that they have been resolved. An extra pair of eyes would be nice to make sure I'm not getting confused with the imports.
diff --git a/packages/richtext-lexical/src/features/blocks/client/markdownTransformer.ts b/packages/richtext-lexical/src/features/blocks/client/markdownTransformer.ts
@@ -6,10 +6,12 @@ import { createHeadlessEditor } from '@lexical/headless'
 import type { Transformer } from '../../../packages/@lexical/markdown/index.js'
 import type { MultilineElementTransformer } from '../../../packages/@lexical/markdown/MarkdownTransformers.js'
 
-import { $convertToMarkdownString } from '../../../packages/@lexical/markdown/index.js'
+import {
+  $convertFromMarkdownString,
+  $convertToMarkdownString,
+} from '../../../packages/@lexical/markdown/index.js'
 import { extractPropsFromJSXPropsString } from '../../../utilities/jsx/extractPropsFromJSXPropsString.js'
 import { propsToJSXString } from '../../../utilities/jsx/jsx.js'
-import { $convertFromMarkdownString } from '../../../utilities/jsx/lexicalMarkdownCopy.js'
 import { $createBlockNode, $isBlockNode, BlockNode } from './nodes/BlocksNode.js'
 
 function createTagRegexes(tagName: string) {
diff --git a/packages/richtext-lexical/src/features/blocks/server/markdownTransformer.ts b/packages/richtext-lexical/src/features/blocks/server/markdownTransformer.ts
@@ -8,14 +8,14 @@ import type { NodeWithHooks } from '../../typesServer.js'
 
 import { getEnabledNodesFromServerNodes } from '../../../lexical/nodes/index.js'
 import {
+  $convertFromMarkdownString,
   $convertToMarkdownString,
   type MultilineElementTransformer,
   type TextMatchTransformer,
   type Transformer,
 } from '../../../packages/@lexical/markdown/index.js'
 import { extractPropsFromJSXPropsString } from '../../../utilities/jsx/extractPropsFromJSXPropsString.js'
 import { propsToJSXString } from '../../../utilities/jsx/jsx.js'
-import { $convertFromMarkdownString } from '../../../utilities/jsx/lexicalMarkdownCopy.js'
 import { linesFromStartToContentAndPropsString } from './linesFromMatchToContentAndPropsString.js'
 import { $createServerBlockNode, $isServerBlockNode, ServerBlockNode } from './nodes/BlocksNode.js'
 import {
diff --git a/packages/richtext-lexical/src/features/experimental_table/markdownTransformer.ts b/packages/richtext-lexical/src/features/experimental_table/markdownTransformer.ts
@@ -15,11 +15,11 @@ import {
 import { $isParagraphNode, $isTextNode } from 'lexical'
 
 import {
+  $convertFromMarkdownString,
   $convertToMarkdownString,
   type ElementTransformer,
   type Transformer,
 } from '../../packages/@lexical/markdown/index.js'
-import { $convertFromMarkdownString } from '../../utilities/jsx/lexicalMarkdownCopy.js'
 
 // Very primitive table setup
 const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/
diff --git a/packages/richtext-lexical/src/index.ts b/packages/richtext-lexical/src/index.ts
@@ -1010,20 +1010,20 @@ export { sanitizeUrl, validateUrl } from './lexical/utils/url.js'
 
 export type * from './nodeTypes.js'
 
-export { defaultRichTextValue } from './populateGraphQL/defaultValue.js'
+export { $convertFromMarkdownString } from './packages/@lexical/markdown/index.js'
 
+export { defaultRichTextValue } from './populateGraphQL/defaultValue.js'
 export { populate } from './populateGraphQL/populate.js'
 export type { LexicalEditorProps, LexicalRichTextAdapter } from './types.js'
+
 export { createServerFeature } from './utilities/createServerFeature.js'
 
 export type { FieldsDrawerProps } from './utilities/fieldsDrawer/Drawer.js'
-
 export { extractPropsFromJSXPropsString } from './utilities/jsx/extractPropsFromJSXPropsString.js'
 export {
   extractFrontmatter,
   frontmatterToObject,
   objectToFrontmatter,
   propsToJSXString,
 } from './utilities/jsx/jsx.js'
-export { $convertFromMarkdownString } from './utilities/jsx/lexicalMarkdownCopy.js'
 export { upgradeLexicalData } from './utilities/upgradeLexicalData/index.js'
diff --git a/packages/richtext-lexical/src/packages/@lexical/markdown/MarkdownTransformers.ts b/packages/richtext-lexical/src/packages/@lexical/markdown/MarkdownTransformers.ts
@@ -185,16 +185,19 @@ export type TextMatchTransformer = Readonly<{
   type: 'text-match'
 }>
 
+const EMPTY_OR_WHITESPACE_ONLY = /^[\t ]*$/
 const ORDERED_LIST_REGEX = /^(\s*)(\d+)\.\s/
 const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/
 const CHECK_LIST_REGEX = /^(\s*)(?:-\s)?\s?(\[(\s|x)?\])\s/i
 const HEADING_REGEX = /^(#{1,6})\s/
 const QUOTE_REGEX = /^>\s/
-const CODE_START_REGEX = /^[ \t]*```(\w+)?/
-const CODE_END_REGEX = /[ \t]*```$/
+const CODE_START_REGEX = /^[ \t]*(\\`\\`\\`|```)(\w+)?/
+const CODE_END_REGEX = /[ \t]*(\\`\\`\\`|```)$/
 const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/
 const TABLE_ROW_REG_EXP = /^\|(.+)\|\s?$/
 const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/
+const TAG_START_REGEX = /^[ \t]*<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i
+const TAG_END_REGEX = /^[ \t]*<\/[a-z_][\w-]*\s*>/i
 
 const createBlockNode = (
   createNode: (match: Array<string>) => ElementNode,
@@ -433,10 +436,11 @@ export const ITALIC_UNDERSCORE: TextFormatTransformer = {
   tag: '_',
 }
 
-export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = false): string {
+export function normalizeMarkdown(input: string, shouldMergeAdjacentLines: boolean): string {
   const lines = input.split('\n')
   let inCodeBlock = false
   const sanitizedLines: string[] = []
+  let nestedDeepCodeBlock = 0
 
   for (let i = 0; i < lines.length; i++) {
     const line = lines[i]
@@ -448,9 +452,24 @@ export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = fals
       continue
     }
 
-    // Detect the start or end of a code block
-    if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
-      inCodeBlock = !inCodeBlock
+    if (CODE_END_REGEX.test(line)) {
+      if (nestedDeepCodeBlock === 0) {
+        inCodeBlock = true
+      }
+      if (nestedDeepCodeBlock === 1) {
+        inCodeBlock = false
+      }
+      if (nestedDeepCodeBlock > 0) {
+        nestedDeepCodeBlock--
+      }
+      sanitizedLines.push(line)
+      continue
+    }
+
+    // Toggle inCodeBlock state when encountering start or end of a code block
+    if (CODE_START_REGEX.test(line)) {
+      inCodeBlock = true
+      nestedDeepCodeBlock++
       sanitizedLines.push(line)
       continue
     }
@@ -464,8 +483,8 @@ export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = fals
     // In markdown the concept of "empty paragraphs" does not exist.
     // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
     if (
-      line === '' ||
-      lastLine === '' ||
+      EMPTY_OR_WHITESPACE_ONLY.test(line) ||
+      EMPTY_OR_WHITESPACE_ONLY.test(lastLine) ||
       !lastLine ||
       HEADING_REGEX.test(lastLine) ||
       HEADING_REGEX.test(line) ||
@@ -475,11 +494,16 @@ export function normalizeMarkdown(input: string, shouldMergeAdjacentLines = fals
       CHECK_LIST_REGEX.test(line) ||
       TABLE_ROW_REG_EXP.test(line) ||
       TABLE_ROW_DIVIDER_REG_EXP.test(line) ||
-      !shouldMergeAdjacentLines
+      !shouldMergeAdjacentLines ||
+      TAG_START_REGEX.test(line) ||
+      TAG_END_REGEX.test(line) ||
+      TAG_START_REGEX.test(lastLine) ||
+      TAG_END_REGEX.test(lastLine) ||
+      CODE_END_REGEX.test(lastLine)
     ) {
       sanitizedLines.push(line)
     } else {
-      sanitizedLines[sanitizedLines.length - 1] = lastLine + line
+      sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trim()
     }
   }
 
diff --git a/packages/richtext-lexical/src/packages/@lexical/markdown/index.ts b/packages/richtext-lexical/src/packages/@lexical/markdown/index.ts
@@ -82,7 +82,7 @@ function $convertFromMarkdownString(
   transformers: Array<Transformer> = TRANSFORMERS,
   node?: ElementNode,
   shouldPreserveNewLines = false,
-  shouldMergeAdjacentLines = false,
+  shouldMergeAdjacentLines = true,
 ): void {
   const sanitizedMarkdown = shouldPreserveNewLines
     ? markdown
diff --git a/packages/richtext-lexical/src/utilities/jsx/lexicalMarkdownCopy.ts b/packages/richtext-lexical/src/utilities/jsx/lexicalMarkdownCopy.ts
diff --git a/test/lexical-mdx/int.spec.ts b/test/lexical-mdx/int.spec.ts
@@ -174,8 +174,6 @@ describe('Lexical MDX', () => {
           ? (sanitizedInputAfterConvertFromEditorJSON ?? sanitizedInput).replace(/\s/g, '')
           : (sanitizedInputAfterConvertFromEditorJSON ?? sanitizedInput)
 
-        console.log('resultNoSpace', resultNoSpace)
-        console.log('inputNoSpace', inputNoSpace)
         expect(resultNoSpace).toBe(inputNoSpace)
       })
     }

Original file line number	Diff line number	Diff line change
`@@ -174,8 +174,6 @@ describe('Lexical MDX', () => {`
`174`	`174`	`? (sanitizedInputAfterConvertFromEditorJSON ?? sanitizedInput).replace(/\s/g, '')`
`175`	`175`	`: (sanitizedInputAfterConvertFromEditorJSON ?? sanitizedInput)`
`176`	`176`
`177`		`- console.log('resultNoSpace', resultNoSpace)`
`178`		`- console.log('inputNoSpace', inputNoSpace)`
`179`	`177`	`expect(resultNoSpace).toBe(inputNoSpace)`
`180`	`178`	`})`
`181`	`179`	`}`