diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js index cb7339e376..888ffc46ba 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js @@ -104,12 +104,18 @@ function decode(params) { return null; } - // ECMA-376 renames w:t → w:delText inside . Other inline content — - // w:noBreakHyphen, w:tab, w:br, etc. — stays as-is; the deletion is - // conveyed by the wrapper alone. Guard the rename so non-text - // atoms inside don't crash. - const textNode = translatedTextNode.elements.find((n) => n.name === 'w:t'); - if (textNode) textNode.name = 'w:delText'; + // ECMA-376 (17.3.3.7) requires w:delText for ALL text runs inside . A + // single run can now hold multiple siblings, because the newline export + // safety net splits text around (e.g. AlphaBeta), + // so rename every direct w:t, not just the first; a leftover inside + // would not be treated as deleted. Other inline content + // (w:noBreakHyphen, w:tab, w:br, etc.) stays as-is; the wrapper alone + // conveys the deletion. + (translatedTextNode.elements || []) + .filter((n) => n.name === 'w:t') + .forEach((n) => { + n.name = 'w:delText'; + }); return { name: 'w:del', diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js index 4aceb41c3d..643fc19441 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js @@ -177,6 +177,37 @@ describe('w:del translator', () => { expect(result.elements[0].elements[0].name).toBe('w:delText'); }); + it('renames every in a multi-segment run to (newline split)', () => { + const mockTrackedMark = { + type: 'trackDelete', + attrs: { + id: '789', + sourceId: '', + author: 'Test', + authorEmail: 'test@example.com', + date: '2025-10-09T12:00:00Z', + }, + }; + + // The newline export safety net produces one run with interleaved w:t/w:br; + // every w:t inside must become w:delText, not just the first. + exportSchemaToJson.mockReturnValue({ + name: 'w:r', + elements: [ + { name: 'w:t', elements: [{ text: 'Alpha', type: 'text' }] }, + { name: 'w:br' }, + { name: 'w:t', elements: [{ text: 'Beta', type: 'text' }] }, + ], + }); + + const node = { type: 'text', text: 'Alpha\nBeta', marks: [mockTrackedMark] }; + const result = config.decode({ node }); + + const run = result.elements[0]; + expect(run.elements.map((n) => n.name)).toEqual(['w:delText', 'w:br', 'w:delText']); + expect(run.elements.some((n) => n.name === 'w:t')).toBe(false); + }); + it('writes sourceId to w:id for round-trip fidelity', () => { const mockTrackedMark = { type: 'trackDelete', diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js index e9eee7aca8..0586bc1b6c 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js @@ -44,11 +44,38 @@ export function getTextNodeForExport(text, marks, params) { partPath: resolveExportPartPath(params), }); - textNodes.push({ - name: 'w:t', - elements: [{ text, type: 'text' }], - attributes: nodeAttrs, - }); + const textValue = typeof text === 'string' ? text : ''; + // Normalize CRLF/CR to LF so Windows line endings export Word-native breaks + // too, rather than leaving a stray carriage return inside . + const normalizedText = textValue.includes('\r') ? textValue.replace(/\r\n?/g, '\n') : textValue; + if (normalizedText.includes('\n')) { + // Export safety net: a raw newline inside is whitespace that Word + // collapses on open (it is not the OOXML representation of a line break), + // while SuperDoc still renders it as a break: the SD-3278 + // divergence. Emit a Word-native between + // segments instead. Everything stays inside this single run so the + // surrounding / wrappers keep wrapping exactly one run. + const segments = normalizedText.split('\n'); + segments.forEach((segment, index) => { + if (segment.length > 0) { + const segmentNeedsSpace = /^\s|\s$/.test(segment); + textNodes.push({ + name: 'w:t', + elements: [{ text: segment, type: 'text' }], + attributes: segmentNeedsSpace ? { 'xml:space': 'preserve' } : null, + }); + } + if (index < segments.length - 1) { + textNodes.push({ name: 'w:br' }); + } + }); + } else { + textNodes.push({ + name: 'w:t', + elements: [{ text: normalizedText, type: 'text' }], + attributes: nodeAttrs, + }); + } // For custom mark export, we need to add a bookmark start and end tag // And store attributes in the bookmark name diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js index 75f0a66550..979c3b9b37 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js @@ -134,4 +134,80 @@ describe('getTextNodeForExport', () => { const runPropertiesChange = runProperties.elements.find((element) => element.name === 'w:rPrChange'); expect(runPropertiesChange.attributes['w:id']).toBe('7'); }); + + // SD-3278 export safety net: a raw newline left inside a PM text + // node (e.g. from an imported .docx that stored breaks as literal '\n') must + // export as a Word-native , not a collapsed newline inside . + describe('raw newline export safety net', () => { + const contentElements = (result) => result.elements.filter((el) => el.name === 'w:t' || el.name === 'w:br'); + + it('exports a single newline as // within one run', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + expect(result.name).toBe('w:r'); + const content = contentElements(result); + expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']); + expect(content[0].elements[0].text).toBe('Alpha'); + expect(content[2].elements[0].text).toBe('Beta'); + }); + + it('never leaves a raw newline inside a ', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts.some((el) => el.elements[0].text.includes('\n'))).toBe(false); + }); + + it('emits a soft break (no w:type="page") for the ', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + const br = result.elements.find((el) => el.name === 'w:br'); + expect(br).toBeDefined(); + expect(br.attributes?.['w:type']).toBeUndefined(); + }); + + it('leaves newline-free text as a single (unchanged)', () => { + const result = getTextNodeForExport('hello world', [], buildParams()); + const content = contentElements(result); + expect(content).toHaveLength(1); + expect(content[0].name).toBe('w:t'); + expect(content[0].elements[0].text).toBe('hello world'); + }); + + it('emits a for each newline including leading, trailing, and consecutive newlines', () => { + const result = getTextNodeForExport('\nA\n\nB\n', [], buildParams()); + const content = contentElements(result); + expect(content.map((el) => el.name)).toEqual(['w:br', 'w:t', 'w:br', 'w:br', 'w:t', 'w:br']); + const texts = content.filter((el) => el.name === 'w:t').map((el) => el.elements[0].text); + expect(texts).toEqual(['A', 'B']); + }); + + it('sets xml:space="preserve" only on segments with edge whitespace', () => { + const result = getTextNodeForExport('Alpha \n Beta', [], buildParams()); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts[0].elements[0].text).toBe('Alpha '); + expect(texts[0].attributes).toEqual({ 'xml:space': 'preserve' }); + expect(texts[1].elements[0].text).toBe(' Beta'); + expect(texts[1].attributes).toEqual({ 'xml:space': 'preserve' }); + }); + + it('does not set xml:space on segments without edge whitespace', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts[0].attributes).toBeNull(); + expect(texts[1].attributes).toBeNull(); + }); + + it('normalizes CRLF to a on export', () => { + const content = contentElements(getTextNodeForExport('Alpha\r\nBeta', [], buildParams())); + expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']); + expect(content[0].elements[0].text).toBe('Alpha'); + expect(content[2].elements[0].text).toBe('Beta'); + }); + + it('normalizes a bare CR to a without leaving a stray carriage return in ', () => { + const result = getTextNodeForExport('Alpha\rBeta', [], buildParams()); + const content = contentElements(result); + expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts.some((el) => el.elements[0].text.includes('\r'))).toBe(false); + }); + }); }); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts index eb1374ccc8..1ab66b302b 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts @@ -9,6 +9,7 @@ import { type NodeOptions = { text?: string; marks?: Array<{ type: { name: string } }>; + leafText?: (node: ProseMirrorNode) => string; isInline?: boolean; isBlock?: boolean; isLeaf?: boolean; @@ -28,7 +29,7 @@ function createNode(typeName: string, children: ProseMirrorNode[] = [], options: const nodeSize = isText ? text.length : options.nodeSize != null ? options.nodeSize : isLeaf ? 1 : contentSize + 2; return { - type: { name: typeName }, + type: { name: typeName, spec: options.leafText ? { leafText: options.leafText } : {} }, text: isText ? text : undefined, nodeSize, isText, @@ -138,6 +139,25 @@ describe('resolveTextRangeInBlock', () => { expect(result).toEqual({ from: 6, to: 7 }); }); + + it('maps visible offsets across tracked deleted leaf nodes without counting them', () => { + const textA = createNode('text', [], { text: 'A' }); + const deletedBreak = createNode('lineBreak', [], { + isInline: true, + isLeaf: true, + marks: [{ type: { name: 'trackDelete' } }], + leafText: () => '\n', + }); + const textB = createNode('text', [], { text: 'B' }); + const paragraph = createNode('paragraph', [textA, deletedBreak, textB], { + isBlock: true, + inlineContent: true, + }); + + const result = resolveTextRangeInBlock(paragraph, 0, { start: 1, end: 2 }, { textModel: 'visible' }); + + expect(result).toEqual({ from: 3, to: 4 }); + }); }); describe('computeTextContentLength', () => { @@ -204,6 +224,26 @@ describe('computeTextContentLength', () => { expect(computeTextContentLength(paragraph, { textModel: 'visible' })).toBe(2); expect(textContentInBlock(paragraph, { textModel: 'visible' })).toBe('AB'); }); + + it('excludes tracked deleted leaf nodes in the visible text model', () => { + const textA = createNode('text', [], { text: 'A' }); + const deletedBreak = createNode('lineBreak', [], { + isInline: true, + isLeaf: true, + marks: [{ type: { name: 'trackDelete' } }], + leafText: () => '\n', + }); + const textB = createNode('text', [], { text: 'B' }); + const paragraph = createNode('paragraph', [textA, deletedBreak, textB], { + isBlock: true, + inlineContent: true, + }); + + expect(computeTextContentLength(paragraph)).toBe(3); + expect(computeTextContentLength(paragraph, { textModel: 'visible' })).toBe(2); + expect(textContentInBlock(paragraph)).toBe('A\nB'); + expect(textContentInBlock(paragraph, { textModel: 'visible' })).toBe('AB'); + }); }); describe('pmPositionToTextOffset', () => { @@ -268,4 +308,23 @@ describe('pmPositionToTextOffset', () => { expect(pmPositionToTextOffset(paragraph, 0, 6, { textModel: 'visible' })).toBe(1); expect(pmPositionToTextOffset(paragraph, 0, 7, { textModel: 'visible' })).toBe(2); }); + + it('keeps PM positions inside tracked deleted leaf nodes at the surrounding visible offset', () => { + const textA = createNode('text', [], { text: 'A' }); + const deletedBreak = createNode('lineBreak', [], { + isInline: true, + isLeaf: true, + marks: [{ type: { name: 'trackDelete' } }], + leafText: () => '\n', + }); + const textB = createNode('text', [], { text: 'B' }); + const paragraph = createNode('paragraph', [textA, deletedBreak, textB], { + isBlock: true, + inlineContent: true, + }); + + expect(pmPositionToTextOffset(paragraph, 0, 2, { textModel: 'visible' })).toBe(1); + expect(pmPositionToTextOffset(paragraph, 0, 3, { textModel: 'visible' })).toBe(1); + expect(pmPositionToTextOffset(paragraph, 0, 4, { textModel: 'visible' })).toBe(2); + }); }); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts index 5395f05ce1..17e95eb01d 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts @@ -29,6 +29,10 @@ function shouldSkipTextNode(node: ProseMirrorNode, options?: TextOffsetOptions): return isVisibleTextModel(options) && hasTrackDeleteMark(node); } +function shouldSkipLeafNode(node: ProseMirrorNode, options?: TextOffsetOptions): boolean { + return isVisibleTextModel(options) && hasTrackDeleteMark(node); +} + function resolveSegmentPosition( targetOffset: number, segmentStart: number, @@ -87,6 +91,10 @@ export function pmPositionToTextOffset( if (node.isLeaf) { const endPos = docPos + node.nodeSize; + if (shouldSkipLeafNode(node, options)) { + if (pmPos < endPos) done = true; + return; + } if (pmPos >= endPos) { offset += 1; } else { @@ -141,6 +149,7 @@ export function computeTextContentLength(blockNode: ProseMirrorNode, options?: T return; } if (node.isLeaf) { + if (shouldSkipLeafNode(node, options)) return; length += 1; return; } @@ -229,6 +238,7 @@ export function resolveTextRangeInBlock( } if (node.isLeaf) { + if (shouldSkipLeafNode(node, options)) return; advanceSegment(1, docPos, docPos + node.nodeSize); return; } @@ -262,7 +272,14 @@ export function textContentInBlock(blockNode: ProseMirrorNode, options?: TextOff } if (node.isLeaf) { - text += '\ufffc'; + if (shouldSkipLeafNode(node, options)) return; + // Honor a leaf's declared visible text (e.g. lineBreak -> '\n', + // noBreakHyphen -> U+2011) so this content model agrees with the visible + // document and with the offset model. All leafText values are one + // character, matching the 1-per-leaf length used by the offset helpers + // above; other leaves fall back to the U+FFFC placeholder. + const leafText = (node.type?.spec as { leafText?: (n: ProseMirrorNode) => string } | undefined)?.leafText; + text += typeof leafText === 'function' ? leafText(node) : '\ufffc'; return; } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts index 8f5c99b9a8..a63576cf03 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts @@ -2,7 +2,9 @@ import { describe, expect, it, vi } from 'vitest'; import { Fragment, Schema } from 'prosemirror-model'; import { buildTextWithTabs, parentAllowsNodeAt, textBetweenWithTabs } from './text-with-tabs.js'; -function makeRealSchema(options: { hasTab?: boolean; hasNoBreakHyphen?: boolean; hasGenericLeaf?: boolean } = {}) { +function makeRealSchema( + options: { hasTab?: boolean; hasLineBreak?: boolean; hasNoBreakHyphen?: boolean; hasGenericLeaf?: boolean } = {}, +) { const nodes: Record = { doc: { content: 'paragraph+' }, paragraph: { group: 'block', content: 'inline*' }, @@ -13,6 +15,11 @@ function makeRealSchema(options: { hasTab?: boolean; hasNoBreakHyphen?: boolean; // Tab is non-leaf, which is why `textBetweenWithTabs` (not PM's built-in textBetween) is needed. nodes.tab = { group: 'inline', inline: true, atom: true, content: 'inline*' }; } + if (options.hasLineBreak) { + // Mirrors the real extensions/line-break/line-break.js shape: inline atom + // that disallows marks (`marks: ''`) and renders to
/ exports to . + nodes.lineBreak = { group: 'inline', inline: true, atom: true, marks: '' }; + } if (options.hasNoBreakHyphen) { // Mirrors the real extensions/no-break-hyphen schema: inline leaf atom with leafText. nodes.noBreakHyphen = { group: 'inline', inline: true, atom: true, leafText: () => '‑' }; @@ -93,12 +100,114 @@ describe('buildTextWithTabs', () => { expect(result.child(0).text).toBe('x'); expect(result.child(0).marks.some((m: any) => m.type.name === 'bold')).toBe(true); expect(result.child(1).type.name).toBe('tab'); - // Tab carries the run's marks — the OOXML exporter reads node.marks on tab + // Tab carries the run's marks. The OOXML exporter reads node.marks on tab // nodes (tab-translator.js:53) to emit matching around . expect(result.child(1).marks.some((m: any) => m.type.name === 'bold')).toBe(true); expect(result.child(2).text).toBe('y'); expect(result.child(2).marks.some((m: any) => m.type.name === 'bold')).toBe(true); }); + + // SD-3278: a literal '\n' inside a text node exports as raw newline + // inside , which Word collapses. It must become a `lineBreak` node so the + // exporter emits a Word-native . + it('splits text around a single newline into text + lineBreak + text', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined); + expect(result).toBeInstanceOf(Fragment); + const fragment = result as Fragment; + expect(fragment.childCount).toBe(3); + expect(fragment.child(0).text).toBe('Alpha'); + expect(fragment.child(1).type.name).toBe('lineBreak'); + expect(fragment.child(2).text).toBe('Beta'); + }); + + it('emits a lineBreak node even when the schema has no tab node type', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined) as Fragment; + expect(result).toBeInstanceOf(Fragment); + expect(result.childCount).toBe(3); + expect(result.child(1).type.name).toBe('lineBreak'); + }); + + it('keeps the raw newline in a single text node when the schema has no lineBreak node type', () => { + const schema = makeRealSchema({ hasTab: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined); + expect((result as any).isText).toBe(true); + expect((result as any).text).toBe('Alpha\nBeta'); + }); + + it('creates the lineBreak node bare in the creation path', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const boldMark = schema.marks.bold.create(); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', [boldMark]) as Fragment; + expect(result.child(0).marks.some((m: any) => m.type.name === 'bold')).toBe(true); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(1).marks.length).toBe(0); + expect(result.child(2).marks.some((m: any) => m.type.name === 'bold')).toBe(true); + }); + + it('omits empty segments around leading, trailing, and consecutive newlines', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const lead = buildTextWithTabs(schema, '\nfoo', undefined) as Fragment; + expect(lead.childCount).toBe(2); + expect(lead.child(0).type.name).toBe('lineBreak'); + expect(lead.child(1).text).toBe('foo'); + + const doubled = buildTextWithTabs(schema, 'a\n\nb', undefined) as Fragment; + expect(doubled.childCount).toBe(4); + expect(doubled.child(0).text).toBe('a'); + expect(doubled.child(1).type.name).toBe('lineBreak'); + expect(doubled.child(2).type.name).toBe('lineBreak'); + expect(doubled.child(3).text).toBe('b'); + }); + + it('interleaves tab and lineBreak nodes when both control characters are present', () => { + const schema = makeRealSchema({ hasTab: true, hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'a\tb\nc', undefined) as Fragment; + expect(result.childCount).toBe(5); + expect(result.child(0).text).toBe('a'); + expect(result.child(1).type.name).toBe('tab'); + expect(result.child(2).text).toBe('b'); + expect(result.child(3).type.name).toBe('lineBreak'); + expect(result.child(4).text).toBe('c'); + }); + + it('keeps the raw tab literal but still splits the newline when tabs are disallowed', () => { + const schema = makeRealSchema({ hasTab: true, hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'a\tb\nc', undefined, { parentAllowsTab: false }) as Fragment; + expect(result.childCount).toBe(3); + expect(result.child(0).text).toBe('a\tb'); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(2).text).toBe('c'); + }); + + // Generated/SDK text often uses CRLF; normalize CRLF and bare CR to + // line breaks so no stray carriage return survives in a text segment. + it('normalizes CRLF to a lineBreak node', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\r\nBeta', undefined) as Fragment; + expect(result.childCount).toBe(3); + expect(result.child(0).text).toBe('Alpha'); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(2).text).toBe('Beta'); + }); + + it('normalizes a bare CR to a lineBreak node without leaving a stray carriage return', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\rBeta', undefined) as Fragment; + expect(result.childCount).toBe(3); + expect(result.child(0).text).toBe('Alpha'); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(2).text).toBe('Beta'); + }); + + // A `text*`-only parent (e.g. total-page-number) rejects lineBreak. + it('keeps the raw newline in a single text node when parentAllowsLineBreak is false', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined, { parentAllowsLineBreak: false }); + expect((result as any).isText).toBe(true); + expect((result as any).text).toBe('Alpha\nBeta'); + }); }); describe('parentAllowsNodeAt', () => { diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts index 3a44a8582f..82e9bbf64f 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts @@ -10,38 +10,77 @@ import type { Transaction } from 'prosemirror-state'; import { TrackDeleteMarkName } from '../../extensions/track-changes/constants.js'; /** - * Build a text-or-fragment suitable for insertion, splitting on '\t' and - * inserting schema `tab` nodes at each split. + * Build a text-or-fragment suitable for insertion, splitting on '\t' and '\n' + * and inserting schema `tab` / `lineBreak` nodes at each split. * - * Returns a plain text node when the schema has no `tab` node type, when the - * parent disallows tab nodes (see {@link parentAllowsTabAt}), or when the text - * contains no tab characters. The raw '\t' is preserved inside the text node - * so exporters and readers still see the character. + * A tab split produces a `tab` node; a newline split produces a `lineBreak` + * node. Each is only performed when the schema exposes the matching node type + * (and, for tabs, when the parent admits them; see {@link parentAllowsNodeAt}). + * A control character that cannot be materialized stays literal inside the + * surrounding text node so exporters and readers still see it. * - * Callers are responsible for ensuring `text` is non-empty (ProseMirror's - * `schema.text` throws on empty input). + * Newlines must become `lineBreak` nodes (not raw '\n' inside a text node): + * a literal newline inside `` is whitespace that Word collapses on open + * (it is not the OOXML representation of a line break), so it would drop the + * visible break while SuperDoc still renders it. The `lineBreak` node + * round-trips to a Word-native ``. See SD-3278. + * + * Returns a plain text node when the text contains no splittable control + * character (the common case). Callers are responsible for ensuring `text` is + * non-empty (ProseMirror's `schema.text` throws on empty input). */ export function buildTextWithTabs( schema: Schema, text: string, marks: readonly ProseMirrorMark[] | undefined, - opts: { parentAllowsTab?: boolean } = {}, + opts: { parentAllowsTab?: boolean; parentAllowsLineBreak?: boolean } = {}, ): ProseMirrorNode | ProseMirrorFragment { - // Check the cheapest/most selective predicate first — most calls carry no '\t'. - if (!text.includes('\t')) return schema.text(text, marks); + // Normalize CRLF/CR to LF up front so Windows line endings (common in + // generated/SDK text) are treated as line breaks too. Only allocate when a + // carriage return is actually present. + const normalized = text.includes('\r') ? text.replace(/\r\n?/g, '\n') : text; + + // Check the cheapest/most selective predicate first; most calls carry neither. + const hasTab = normalized.includes('\t'); + const hasNewline = normalized.includes('\n'); + if (!hasTab && !hasNewline) return schema.text(normalized, marks); + + // `lineBreak` is a normal inline node, but some textblocks restrict their + // content to `text*` (e.g. total-page-number) and reject it. Gate on parent + // admission the same way tabs do: callers that target restrictive parents + // pass the probe result; others default to allowed. When disallowed or absent + // we fall back to literal text, and the export safety net still converts any + // raw newline to `` on export. + const tabNodeType = hasTab && opts.parentAllowsTab !== false ? schema.nodes?.tab : undefined; + const lineBreakNodeType = hasNewline && opts.parentAllowsLineBreak !== false ? schema.nodes?.lineBreak : undefined; + if (!tabNodeType && !lineBreakNodeType) return schema.text(normalized, marks); - const tabNodeType = schema.nodes?.tab; - if (!tabNodeType || opts.parentAllowsTab === false) return schema.text(text, marks); + // `NodeType.create` takes `readonly Mark[] | undefined` (not null) for marks. + const tabMarks: readonly ProseMirrorMark[] | undefined = marks ?? undefined; + + // Split only on the control characters we can replace with a node; any other + // control character stays literal inside the surrounding text segments. + const splitPattern = [tabNodeType ? '\\t' : null, lineBreakNodeType ? '\\n' : null].filter(Boolean).join('|'); + const parts = normalized.split(new RegExp(`(${splitPattern})`)); - const tabMarks = (marks ?? null) as ProseMirrorMark[] | null; - const parts = text.split('\t'); const nodes: ProseMirrorNode[] = []; - for (let i = 0; i < parts.length; i++) { - if (parts[i]) nodes.push(schema.text(parts[i], marks)); - // Carry the surrounding marks onto the tab node so the OOXML exporter - // wraps `` in a matching `` — keeps formatting unbroken - // across the tab (bold-run | tab | bold-run rather than bold | plain | bold). - if (i < parts.length - 1) nodes.push(tabNodeType.create(null, null, tabMarks)); + for (const part of parts) { + if (part === '') continue; // schema.text throws on empty input + if (part === '\t' && tabNodeType) { + // Carry the surrounding marks onto the tab node so the OOXML exporter + // wraps `` in a matching ``, keeping formatting unbroken + // across the tab (bold-run | tab | bold-run rather than bold | plain | bold). + nodes.push(tabNodeType.create(null, null, tabMarks)); + } else if (part === '\n' && lineBreakNodeType) { + // Create the break bare: a soft line break carries no run formatting. + // (Tracking an inserted break so it exports inside is a separate + // concern: br-translator does not yet route node.marks the way + // noBreakHyphen's translator does; see SD-3371. It is not a schema limit: + // a leaf atom can carry marks, governed by the parent run/paragraph.) + nodes.push(lineBreakNodeType.create()); + } else { + nodes.push(schema.text(part, marks)); + } } return Fragment.from(nodes); } @@ -128,8 +167,14 @@ export function textBetweenWithTabs( } if (node.isLeaf) { if (node.isInline) { + if ( + options.textModel === 'visible' && + node.marks?.some((mark: ProseMirrorMark) => mark.type.name === TrackDeleteMarkName) + ) { + return false; + } // Honor PM's `leafText` NodeSpec contract: an inline leaf can declare - // its visible text representation (e.g. noBreakHyphen → U+2011) so + // its visible text representation (e.g. noBreakHyphen -> U+2011) so // flattened reads match the rendered glyph instead of producing the // generic placeholder. Falls back to `leafFallback` when undefined. const leafTextFn = node.type?.spec?.leafText; diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts index 82ab9e11ff..4deabd771e 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts @@ -997,7 +997,10 @@ function insertTextAroundSdt( const { tr } = editor.state; const tabType = editor.schema.nodes?.tab; const parentAllowsTab = tabType && content.includes('\t') ? parentAllowsNodeAt(tr, pos, tabType) : false; - tr.insert(pos, buildTextWithTabs(editor.schema, content, undefined, { parentAllowsTab })); + const lineBreakType = editor.schema.nodes?.lineBreak; + const parentAllowsLineBreak = + lineBreakType && /[\r\n]/.test(content) ? parentAllowsNodeAt(tr, pos, lineBreakType) : false; + tr.insert(pos, buildTextWithTabs(editor.schema, content, undefined, { parentAllowsTab, parentAllowsLineBreak })); dispatchTransaction(editor, tr); return true; } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts index cf2c46d86b..0ec53f6615 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts @@ -886,6 +886,16 @@ export function executeTextRewrite( const replacementText = getReplacementText(step.args.replacement); const marks = resolveMarksForRange(editor, target, step); + // A rewrite range can start in a normal parent (a run) and span into a + // `text*`-only inline container (e.g. total-page-number) that rejects + // lineBreak. The edits below land at remapped positions whose parent may + // differ from `absFrom`, so probe admission at each actual edit position + // rather than once at the range start: a newline that lands in a restrictive + // parent falls back to literal text (the export safety net still emits + // ). Probe on the current `tr` so prior in-loop steps are reflected. + const lineBreakNodeType = editor.state.schema.nodes?.lineBreak; + const parentAllowsLineBreakAt = (pos: number): boolean => + lineBreakNodeType ? parentAllowsNodeAt(tr, pos, lineBreakNodeType) : false; const structuralRewrite = resolveStructuralRangeRewrite(tr.doc, absFrom, absTo, step); if (structuralRewrite) { @@ -935,7 +945,9 @@ export function executeTextRewrite( tr.delete(absFrom, absTo); return { changed: target.text.length > 0 }; } - const content = buildTextWithTabs(editor.state.schema, replacementText, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, replacementText, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(absFrom), + }); tr.replaceWith(absFrom, absTo, content); return { changed: replacementText !== target.text }; } @@ -991,10 +1003,14 @@ export function executeTextRewrite( if (change.type === 'delete') { tr.delete(remap(change.docFrom), remap(change.docTo)); } else if (change.type === 'insert') { - const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(remap(change.docPos)), + }); tr.insert(remap(change.docPos), content); } else { - const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(remap(change.docFrom)), + }); tr.replaceWith(remap(change.docFrom), remap(change.docTo), content); } } @@ -1002,12 +1018,15 @@ export function executeTextRewrite( // Pure deletion after trimming: a non-empty replacement whose new text is // fully contained in the old text's common prefix + suffix collapses to an // empty delta (e.g. "best endeavours to:" → "endeavours to:" leaves - // trimmedNew === ""). Delete the removed range rather than building - // schema.text('') — ProseMirror rejects empty text nodes. + // trimmedNew === ""). This also covers removing a lineBreak, now that + // lineBreak counts as "\n" in the diff. Delete the removed range rather than + // building schema.text(''), which ProseMirror rejects for empty text. tr.delete(trimmedFrom, trimmedTo); } else { // 0 or 1 word change: replace just the trimmed range. - const content = buildTextWithTabs(editor.state.schema, trimmedNew, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, trimmedNew, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(trimmedFrom), + }); tr.replaceWith(trimmedFrom, trimmedTo, content); } @@ -1138,7 +1157,12 @@ export function executeTextInsert( const tabNodeType = editor.state.schema.nodes?.tab; const parentAllowsTab = tabNodeType && text.includes('\t') ? parentAllowsNodeAt(tr, absPos, tabNodeType) : false; - tr.insert(absPos, buildTextWithTabs(editor.state.schema, text, marks, { parentAllowsTab })); + // Restrictive parents like total-page-number are `text*` and reject lineBreak; + // probe admission so newline-bearing inserts fall back to literal text there. + const lineBreakNodeType = editor.state.schema.nodes?.lineBreak; + const parentAllowsLineBreak = + lineBreakNodeType && /[\r\n]/.test(text) ? parentAllowsNodeAt(tr, absPos, lineBreakNodeType) : false; + tr.insert(absPos, buildTextWithTabs(editor.state.schema, text, marks, { parentAllowsTab, parentAllowsLineBreak })); return { changed: true }; } @@ -1323,7 +1347,13 @@ export function executeSpanTextRewrite( // For single replacement block, use flat replacement into the span if (replacementBlocks.length === 1) { const marks = resolveSpanMarks(editor, target, policy, step.id); - const content = buildTextWithTabs(editor.state.schema, replacementBlocks[0], asProseMirrorMarks(marks)); + // Probe parent admission so a newline replacement into a `text*`-only span + // parent falls back to literal text (the export safety net handles the rest). + const lineBreakNodeType = editor.state.schema.nodes?.lineBreak; + const parentAllowsLineBreak = lineBreakNodeType ? parentAllowsNodeAt(tr, absFrom, lineBreakNodeType) : false; + const content = buildTextWithTabs(editor.state.schema, replacementBlocks[0], asProseMirrorMarks(marks), { + parentAllowsLineBreak, + }); tr.replaceWith(absFrom, absTo, content); return { changed: true }; } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts new file mode 100644 index 0000000000..3d69098ae7 --- /dev/null +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts @@ -0,0 +1,548 @@ +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js'; +import { executeSpanTextRewrite, executeTextInsert, executeTextRewrite } from './executor.ts'; + +/** + * SD-3278: multi-line text forwarded into text-mode mutations (e.g. + * an agentic SDK workflow) must export Word-native line breaks, not a raw '\n' + * inside one that Word collapses. + * + * Two paths are covered: + * - Creation path: doc.replace / text.rewrite with a single '\n' must build a + * `lineBreak` PM node (not a literal '\n' in a text node). + * - Export safety net: a text node that already holds a raw '\n' (e.g. from an + * imported .docx that stored breaks as literal newlines) must still export a + * Word-native . + */ + +function makeSchemaEditor(paragraphs: string[] = ['hello world']) { + return initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: paragraphs.map((text) => ({ + type: 'paragraph', + attrs: {}, + content: [{ type: 'run', attrs: {}, content: [{ type: 'text', text }] }], + })), + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; +} + +function getFirstMatchRef(editor: any, pattern: string): string { + const match = editor.doc.query.match({ select: { type: 'text', pattern }, require: 'first' }); + const ref = match?.items?.[0]?.handle?.ref; + if (!ref) throw new Error(`Could not resolve ref for pattern "${pattern}"`); + return ref; +} + +function hasNodeOfType(editor: any, name: string): boolean { + let found = false; + editor.state.doc.descendants((node: any) => { + if (found) return false; + if (node.type.name === name) { + found = true; + return false; + } + return true; + }); + return found; +} + +function paragraphCount(editor: any): number { + let count = 0; + editor.state.doc.forEach((node: any) => { + if (node.type.name === 'paragraph') count += 1; + }); + return count; +} + +function countNodeType(editor: any, name: string): number { + let count = 0; + editor.state.doc.descendants((node: any) => { + if (node.type.name === name) count += 1; + }); + return count; +} + +describe('text-mode mutations: single newline handling (SD-3278)', () => { + let editor: any | undefined; + + afterEach(() => { + editor?.destroy(); + editor = undefined; + }); + + it('direct doc.replace with a single newline builds a lineBreak node inside one paragraph', () => { + editor = makeSchemaEditor(['hello world']); + + const receipt = editor.doc.replace( + { ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, + { changeMode: 'direct' }, + ); + + expect(receipt.success).toBe(true); + // A single '\n' stays within the paragraph (only '\n\n+' splits paragraphs). + expect(paragraphCount(editor)).toBe(1); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(true); + // It must NOT become a page break. + expect(hasNodeOfType(editor, 'hardBreak')).toBe(false); + }); + + it('tracked doc.replace with a single newline builds a lineBreak node', () => { + editor = makeSchemaEditor(['hello world']); + + const receipt = editor.doc.replace( + { ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, + { changeMode: 'tracked' }, + ); + + expect(receipt.success).toBe(true); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(true); + expect(hasNodeOfType(editor, 'hardBreak')).toBe(false); + }); + + // Read-model consistency (SD-3278): a lineBreak created on the write side must + // read back as '\n' on the diff/search paths, or query.match cannot find + // break-bearing content and an identical rewrite duplicates the break. + it('finds break-bearing content by \\n and is idempotent under an identical rewrite (no duplicate break)', () => { + editor = makeSchemaEditor(['hello world']); + editor.doc.replace({ ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, { changeMode: 'direct' }); + expect(countNodeType(editor, 'lineBreak')).toBe(1); + + // query.match must see the break as '\n' to resolve a ref to the content. + const ref = getFirstMatchRef(editor, 'Alpha\nBeta'); + editor.doc.replace({ ref, text: 'Alpha\nBeta' }, { changeMode: 'direct' }); + // The identical rewrite is a no-op: still exactly one break, not two. + expect(countNodeType(editor, 'lineBreak')).toBe(1); + }); + + it('rewriting break-bearing content to single-line text removes the break', () => { + editor = makeSchemaEditor(['hello world']); + editor.doc.replace({ ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, { changeMode: 'direct' }); + expect(countNodeType(editor, 'lineBreak')).toBe(1); + + const ref = getFirstMatchRef(editor, 'Alpha\nBeta'); + editor.doc.replace({ ref, text: 'AlphaBeta' }, { changeMode: 'direct' }); + expect(countNodeType(editor, 'lineBreak')).toBe(0); + expect(editor.state.doc.firstChild?.textContent).toBe('AlphaBeta'); + }); + + // Proves the bot P1 (rewrite char-diff counts the break) DIRECTLY, without + // query.match: rewriting `AlphaBeta` to the same visible text must + // be a no-op (the diff reads the break as '\n' via leafText), so it neither + // duplicates nor strands the break. + it('an identical rewrite over an existing lineBreak node is a no-op (direct target)', () => { + // paragraph > run > [ text 'Alpha', lineBreak, text 'Beta' ] + editor = initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: [ + { + type: 'paragraph', + attrs: {}, + content: [ + { + type: 'run', + attrs: {}, + content: [{ type: 'text', text: 'Alpha' }, { type: 'lineBreak' }, { type: 'text', text: 'Beta' }], + }, + ], + }, + ], + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + expect(countNodeType(editor, 'lineBreak')).toBe(1); + + // Span the inline content: first text/lineBreak start .. last node end. + let absFrom = -1; + let absTo = -1; + editor.state.doc.descendants((node: any, pos: number) => { + if (node.isText || node.type.name === 'lineBreak') { + if (absFrom === -1) absFrom = pos; + absTo = pos + node.nodeSize; + } + }); + + const tr = editor.state.tr; + const target = { + kind: 'range', + stepId: 'idem', + op: 'text.rewrite', + blockId: '__selection__', + from: 0, + to: 0, + absFrom, + absTo, + text: 'Alpha\nBeta', + capturedStyle: undefined, + } as any; + const step = { + id: 'idem-rewrite', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'Alpha\nBeta' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + executeTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any); + editor.dispatch(tr); + + // Still exactly one break: not duplicated (the bot P1 regression) and not removed. + expect(countNodeType(editor, 'lineBreak')).toBe(1); + expect(editor.state.doc.firstChild?.textContent).toBe('Alpha\nBeta'); + }); +}); + +// lineBreak must not be forced into a parent that rejects it. The +// total-page-number node is `content: 'text*'`, so a newline insert there must +// fall back to literal text rather than throwing (mirrors the tab guard). +describe('newline insert into a restrictive (text*) parent', () => { + let editor: any | undefined; + + afterEach(() => { + editor?.destroy(); + editor = undefined; + }); + + function makeEditorWithTotalPageCount() { + return initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: [ + { + type: 'paragraph', + attrs: {}, + content: [ + { + type: 'run', + attrs: {}, + content: [{ type: 'total-page-number', attrs: {}, content: [{ type: 'text', text: '7' }] }], + }, + ], + }, + ], + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + } + + function findTotalPageNumberPos(ed: any): number { + let pos: number | undefined; + ed.state.doc.descendants((node: any, nodePos: number) => { + if (pos !== undefined) return false; + if (node.type.name === 'total-page-number') { + pos = nodePos; + return false; + } + return true; + }); + if (pos === undefined) throw new Error('total-page-number node not found'); + return pos; + } + + it('inserts a\\nb into total-page-number without throwing and without creating a lineBreak node', () => { + editor = makeEditorWithTotalPageCount(); + const nodePos = findTotalPageNumberPos(editor); + const innerPos = nodePos + 1; // inside the total-page-number, before its '7' + + const tr = editor.state.tr; + const target = { + kind: 'range', + stepId: 'step-1', + op: 'text.insert', + blockId: 'total-page-number-1', + from: 0, + to: 0, + absFrom: innerPos, + absTo: innerPos, + text: '', + marks: [], + } as any; + const step = { + id: 'insert-newline-into-total-page-number', + op: 'text.insert', + where: { by: 'ref', ref: 'ignored' }, + args: { position: 'before', content: { text: 'a\nb' } }, + } as any; + + expect(() => executeTextInsert(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + const totalPageNumber = editor.state.doc.nodeAt(nodePos); + expect(totalPageNumber?.type.name).toBe('total-page-number'); + expect(totalPageNumber?.textContent).toBe('a\nb7'); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); + + // The fix also threads the parent-admission probe into the rewrite path + // (executeTextRewrite / executeSpanTextRewrite), not just text.insert. Rewriting + // text INSIDE a text*-only parent with a newline must not throw or inject a + // lineBreak; it falls back to literal text because the field is `text*` only. + it('rewrites text inside total-page-number with a\\nb without throwing or creating a lineBreak node', () => { + editor = makeEditorWithTotalPageCount(); + const nodePos = findTotalPageNumberPos(editor); + const innerPos = nodePos + 1; // the '7' text node sits at [innerPos, innerPos + 1] + + const tr = editor.state.tr; + const target = { + kind: 'range', + stepId: 'rewrite-step', + op: 'text.rewrite', + // '__selection__' makes resolveMarksForRange skip style capture (no block). + blockId: '__selection__', + from: 0, + to: 1, + absFrom: innerPos, + absTo: innerPos + 1, + text: '7', + capturedStyle: undefined, + } as any; + const step = { + id: 'rewrite-newline-into-total-page-number', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'a\nb' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + const totalPageNumber = editor.state.doc.nodeAt(nodePos); + expect(totalPageNumber?.type.name).toBe('total-page-number'); + // No lineBreak node was forced into the text*-only parent. + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); + + // The probe must be taken at the actual edit position, not once at the range + // start: a rewrite can start in a normal run (which allows lineBreak) and the + // newline edit can land inside a total-page-number (text*-only). A single + // probe at the start would mint a lineBreak that the field parent rejects. + it('does not force a lineBreak into total-page-number when a spanning rewrite lands the newline inside the field', () => { + // paragraph > run > [ text 'AB', total-page-number > text '7' ] + editor = initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: [ + { + type: 'paragraph', + attrs: {}, + content: [ + { + type: 'run', + attrs: {}, + content: [ + { type: 'text', text: 'AB' }, + { type: 'total-page-number', attrs: {}, content: [{ type: 'text', text: '7' }] }, + ], + }, + ], + }, + ], + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + + const nodePos = findTotalPageNumberPos(editor); // total-page-number opens here; 'B' is at nodePos-1 + const tr = editor.state.tr; + // Range 'B7' starts in the run ('B') and spans into the field's text ('7'). + // The replacement appends '\n', whose edit lands at the end of the field's + // text (a text*-only parent). + const target = { + kind: 'range', + stepId: 'span-rewrite', + op: 'text.rewrite', + blockId: '__selection__', + from: 0, + to: 0, + absFrom: nodePos - 1, // 'B' in the run + absTo: nodePos + 2, // just after '7' inside the field + text: 'B7', + capturedStyle: undefined, + } as any; + const step = { + id: 'span-rewrite-into-field', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'B7\n' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + expect(editor.state.doc.nodeAt(nodePos)?.type.name).toBe('total-page-number'); + // The newline landed in the text*-only field, so it falls back to literal + // text rather than forcing a (schema-invalid) lineBreak there. + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); + + // executeSpanTextRewrite has its own single-block replacement path with a + // separate parentAllowsLineBreak probe. The rewrite/insert paths above cover + // their probes, but the span path's was unexercised even though its source + // comment claims it is covered. A single inline '\n' stays in one replacement + // block (split is on \n{2,}), so it reaches this single-block path. + it('span rewrite with a single newline builds one lineBreak in a normal parent', () => { + editor = makeSchemaEditor(['hello world']); // paragraph > run > text 'hello world' + const tr = editor.state.tr; + + // A real two-segment span over the run text ('hello' + ' world'). The run + // admits a lineBreak, so the single '\n' must mint exactly one. + const target = { + kind: 'span', + stepId: 'span-newline-normal', + op: 'text.rewrite', + matchId: 'm:span-normal', + segments: [ + { blockId: 'p1', from: 0, to: 5, absFrom: 2, absTo: 7 }, + { blockId: 'p1', from: 5, to: 11, absFrom: 7, absTo: 13 }, + ], + text: 'hello world', + marks: [], + capturedStyleBySegment: [], + } as any; + const step = { + id: 'span-newline-normal', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'Alpha\nBeta' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeSpanTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + expect(countNodeType(editor, 'lineBreak')).toBe(1); + expect(hasNodeOfType(editor, 'hardBreak')).toBe(false); + + // The break is a real node, never a raw '\n' baked into a text node. + let rawNewlineText = false; + editor.state.doc.descendants((node: any) => { + if (node.isText && typeof node.text === 'string' && node.text.includes('\n')) rawNewlineText = true; + }); + expect(rawNewlineText).toBe(false); + }); + + it('span rewrite with a newline into total-page-number falls back to literal text, no lineBreak', () => { + editor = makeEditorWithTotalPageCount(); // paragraph > run > total-page-number > text '7' + const nodePos = findTotalPageNumberPos(editor); + const tr = editor.state.tr; + + // The span sits entirely inside the field's text ('7' at [nodePos+1, nodePos+2]). + // total-page-number is text*-only, so the probe at the edit position rejects a + // lineBreak and the replacement falls back to literal text (the export safety + // net turns it into a later). + const target = { + kind: 'span', + stepId: 'span-newline-field', + op: 'text.rewrite', + matchId: 'm:span-field', + segments: [{ blockId: 'p1', from: 0, to: 1, absFrom: nodePos + 1, absTo: nodePos + 2 }], + text: '7', + marks: [], + capturedStyleBySegment: [], + } as any; + const step = { + id: 'span-newline-field', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'Alpha\nBeta' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeSpanTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + expect(editor.state.doc.nodeAt(nodePos)?.type.name).toBe('total-page-number'); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); +}); + +describe('docx export: newline becomes (SD-3278)', () => { + let docData: Awaited>; + let editor: any | undefined; + + beforeAll(async () => { + docData = await loadTestDataForEditorTests('blank-doc.docx'); + }); + + const makeBlankDocEditor = () => + initTestEditor({ + content: docData.docx, + media: docData.media, + mediaFiles: docData.mediaFiles, + fonts: docData.fonts, + // Tracked mutations require an author/user on the editor instance. + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + + afterEach(() => { + editor?.destroy(); + editor = undefined; + }); + + it('exports a Word-native for a text node that already holds a raw newline', async () => { + editor = makeBlankDocEditor(); + + // Seed a raw '\n' directly via the core command (tr.insertText keeps the + // literal newline) to reproduce an imported .docx whose held a raw + // newline (this exercises the export safety net, not the creation path). + editor.dispatch(editor.state.tr.insertText('Alpha\nBeta', 1)); + expect(editor.state.doc.textContent).toContain('Alpha\nBeta'); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + + const xml = await editor.exportDocx({ exportXmlOnly: true }); + expect(xml).toContain(' around (no leftover )', async () => { + editor = makeBlankDocEditor(); + + // Seed "Alpha\nBeta" as one text node, then mark the whole range as a tracked + // deletion. On export the run is split around ; every segment must + // become inside , never a stray . + editor.dispatch(editor.state.tr.insertText('Alpha\nBeta', 1)); + const delMark = editor.schema.marks.trackDelete.create({ + id: 'del-1', + author: 'Reviewer', + authorEmail: 'reviewer@example.com', + date: '2026-01-01T00:00:00.000Z', + }); + editor.dispatch(editor.state.tr.addMark(1, 1 + 'Alpha\nBeta'.length, delMark)); + + const xml = await editor.exportDocx({ exportXmlOnly: true }); + expect(xml).toContain(' survives inside the deletion (it would be ignored by Word). + expect(/]*>[\s\S]*?<\/w:del>/.test(xml)).toBe(false); + }); + + it('tracked doc.replace with a newline exports valid tracked OOXML (inserted text in , break preserved as )', async () => { + editor = makeBlankDocEditor(); + + // Seed text to target, then tracked-replace it with multi-line content. + editor.dispatch(editor.state.tr.insertText('hello world', 1)); + const match = editor.doc.query.match({ select: { type: 'text', pattern: 'hello world' }, require: 'first' }); + const ref = match?.items?.[0]?.handle?.ref; + expect(ref).toBeTruthy(); + + editor.doc.replace({ ref, text: 'Alpha\nBeta' }, { changeMode: 'tracked' }); + + const xml = await editor.exportDocx({ exportXmlOnly: true }); + // Inserted text is tracked and the newline survives as a Word-native break. + expect(xml).toContain('/ the way noBreakHyphen's translator does, so the break + // exports as its own run rather than inside . The output is valid + // OOXML; on reject the orphan break remains. This is an export-routing gap, + // not a schema limit (a leaf atom can carry marks). Tracked deletes of an + // existing raw-newline node keep the break inside via the single-run + // path above. + }); +}); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts index 6b038a2210..1d1af2b13b 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts @@ -917,7 +917,15 @@ function materializeTab(schema: Schema): ProseMirrorNode { } function materializeLineBreak(schema: Schema): ProseMirrorNode { - const nodeType = schema.nodes.hardBreak ?? schema.nodes.lineBreak; + // A `kind: 'lineBreak'` item is a soft break and must materialize to `lineBreak` + // (exports ``), not `hardBreak` (exports ``, a page + // break). Block-level page breaks use the distinct `kind: 'break'` path. + // KNOWN AMBIGUITY: structural projection (sd-projection) maps BOTH hardBreak + // and lineBreak to `kind: 'lineBreak'` with no discriminator, so a structural + // read/write echo of an *inline* page break is demoted to a soft break here. + // That projection ambiguity predates this change and needs a discriminator to + // round-trip inline page breaks (separate follow-up). + const nodeType = schema.nodes.lineBreak ?? schema.nodes.hardBreak; if (!nodeType) return schema.text('\n'); return nodeType.create(); } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts index 6559632baa..d3e3085c80 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts @@ -895,6 +895,74 @@ describe('materializeFragment — capability gates', () => { // Extension nodes bypass capability gates — should fall through to fallback materializer expect(() => materializeFragment(editor.state.schema, fragment, new Set(), 'insert')).not.toThrow(); }); + + // SD-3278: a `kind: 'lineBreak'` inline must become a soft `lineBreak` + // node (exports to ), NOT a `hardBreak` node (exports to a page break). + // This is the bug that made the structural.replace workaround fail. + it('materializes a structural lineBreak as a soft lineBreak node, not a hardBreak', () => { + const fragment: SDFragment = { + kind: 'paragraph', + paragraph: { + inlines: [ + { kind: 'run', run: { text: 'Alpha' } }, + { kind: 'lineBreak' }, + { kind: 'run', run: { text: 'Beta' } }, + ], + }, + } as any; + + const materialized = materializeFragment(editor.state.schema, fragment, new Set(), 'insert'); + const roots = Array.isArray(materialized) ? materialized : [materialized]; + const typeNames = new Set(); + roots.forEach((root: any) => root.descendants((node: any) => typeNames.add(node.type.name))); + + expect(typeNames.has('lineBreak')).toBe(true); + expect(typeNames.has('hardBreak')).toBe(false); + }); + + // SD-3278: a structural run whose text carries a raw newline (a natural shape + // for SDK/agent-built `structural.replace` content) must split into lineBreak + // nodes, not keep the literal '\n' in a text node. + it('splits a newline inside structural run text into a lineBreak node, not raw text', () => { + const fragment: SDFragment = { + kind: 'paragraph', + paragraph: { + inlines: [{ kind: 'run', run: { text: 'left\nright' } }], + }, + } as any; + + const materialized = materializeFragment(editor.state.schema, fragment, new Set(), 'insert'); + const roots = Array.isArray(materialized) ? materialized : [materialized]; + const typeCounts: Record = {}; + let text = ''; + roots.forEach((root: any) => + root.descendants((node: any) => { + typeCounts[node.type.name] = (typeCounts[node.type.name] ?? 0) + 1; + if (node.isText) text += node.text ?? ''; + }), + ); + + expect(typeCounts.lineBreak).toBe(1); + expect(typeCounts.hardBreak ?? 0).toBe(0); + // No raw newline survives inside a text node. + expect(text.includes('\n')).toBe(false); + expect(text).toContain('left'); + expect(text).toContain('right'); + // The break reads back as '\n' via leafText, so structural reads that + // flatten PM content preserve it on round-trip (SD-3278). + const readableText = roots + .map((root: any) => { + if (typeof root.textBetween !== 'function') return root.textContent ?? ''; + const leafText = (node: any) => { + const specLeafText = node?.type?.spec?.leafText; + return typeof specLeafText === 'function' ? specLeafText(node) : ''; + }; + const size = root.content?.size ?? root.size ?? 0; + return root.textBetween(0, size, '', leafText); + }) + .join(''); + expect(readableText).toBe('left\nright'); + }); }); // --------------------------------------------------------------------------- diff --git a/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js b/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js index 4fbc2a148c..e141dcc41f 100644 --- a/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js +++ b/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js @@ -32,6 +32,17 @@ export const LineBreak = Node.create({ content: '', atom: true, + // The visible text representation of this leaf. Without it, flattening APIs + // see the break as nothing or a U+FFFC placeholder, so the rewrite char-diff + // and the doc-api text model (query.match, structural projection) disagree + // with the visible document. For example, an idempotent text.rewrite over + // `Alpha\nBeta` would duplicate the break, and query.match could not find it. + // Read by PM's built-in `Node.textBetween` (so `node.textContent` too), + // SuperDoc's `textBetweenWithTabs` / `charOffsetToDocPos`, SearchIndex, and + // `text-offset-resolver`. Mirrors the `noBreakHyphen` leaf (U+2011). See + // SD-3278. + leafText: () => '\n', + addOptions() { return {}; }, diff --git a/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js b/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js index 593f599a5f..94a02b611c 100644 --- a/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js +++ b/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js @@ -386,14 +386,16 @@ export const Paragraph = OxmlNode.create({ // We avoid `findParentNode(isList)` here because `isList` depends on // `getResolvedParagraphProperties`, a WeakMap cache keyed by node // identity. After the numbering plugin's `appendTransaction` sets - // `listRendering`, the paragraph node object is replaced, leaving - // the new node uncached — causing `isList` to return false. + // `listRendering`, the paragraph node object is replaced, so the + // new node is uncached and `isList` can return false. const { $from } = selection; let paragraph = null; + let paragraphDepth = null; for (let d = $from.depth; d >= 0; d--) { const node = $from.node(d); if (node.type.name === 'paragraph') { paragraph = node; + paragraphDepth = d; break; } } @@ -404,7 +406,18 @@ export const Paragraph = OxmlNode.create({ if (!isListParagraph) return false; if (!isVisuallyEmptyParagraph(paragraph) && !hasOnlyBreakContent(paragraph)) return false; - const tr = state.tr.insertText(event.data); + let tr = state.tr; + if (hasOnlyBreakContent(paragraph) && paragraphDepth != null) { + const contentStart = $from.start(paragraphDepth); + const contentEnd = $from.end(paragraphDepth); + tr = tr.delete(contentStart, contentEnd).insertText(event.data, contentStart); + // insertText at an explicit position leaves the caret before the inserted + // text, so subsequent native keystrokes prepend instead of append (typing + // "abcdef" lands as "bcdefa"). Place the caret after the inserted text. + tr = tr.setSelection(TextSelection.create(tr.doc, contentStart + event.data.length)); + } else { + tr = tr.insertText(event.data); + } view.dispatch(tr); event.preventDefault(); return true; diff --git a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js index a830c3d298..162d6e5289 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js +++ b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js @@ -28,6 +28,12 @@ const DELETION_BARRIER = '\u0000'; const DEFAULT_SEARCH_MODEL = 'raw'; const hasTrackDeleteMark = (node) => node?.marks?.some((mark) => mark?.type?.name === 'trackDelete') ?? false; +const readLeafText = (node) => { + const leafText = node?.type?.spec?.leafText; + if (typeof leafText === 'function') return leafText(node); + if (typeof leafText === 'string') return leafText; + return ATOM_PLACEHOLDER; +}; /** * SearchIndex provides a lazily-built, cached index for searching across @@ -69,7 +75,7 @@ export class SearchIndex { this.#buildVisible(doc); } else { // Get the flattened text using ProseMirror's optimized textBetween - this.text = doc.textBetween(0, doc.content.size, BLOCK_SEPARATOR, ATOM_PLACEHOLDER); + this.text = doc.textBetween(0, doc.content.size, BLOCK_SEPARATOR, readLeafText); } this.segments = []; @@ -141,7 +147,12 @@ export class SearchIndex { } if (node.isLeaf) { - parts.push(ATOM_PLACEHOLDER); + if (hasTrackDeleteMark(node)) { + appendDeletionBarrier(); + return; + } + + parts.push(readLeafText(node)); emittedDeletionBarrier = false; return; } @@ -240,29 +251,48 @@ export class SearchIndex { } if (node.isLeaf) { + if (searchModel === 'visible' && hasTrackDeleteMark(node)) { + if (context?.deletionBarrierActive) { + return offset; + } + addSegment({ + offsetStart: offset, + offsetEnd: offset + 1, + docFrom: docPos, + docTo: docPos + node.nodeSize, + kind: 'atom', + }); + if (context) { + context.deletionBarrierActive = true; + } + return offset + 1; + } + if (context && searchModel === 'visible') { context.deletionBarrierActive = false; } + const leafText = readLeafText(node); + if (leafText.length === 0) return offset; // Leaf node (atom): check if it's a hard_break or other atom if (node.type.name === 'hard_break') { addSegment({ offsetStart: offset, - offsetEnd: offset + 1, + offsetEnd: offset + leafText.length, docFrom: docPos, docTo: docPos + node.nodeSize, kind: 'hardBreak', }); - return offset + 1; + return offset + leafText.length; } - // Other atoms get the replacement character + // Other atoms use their declared leaf text or the replacement character. addSegment({ offsetStart: offset, - offsetEnd: offset + 1, + offsetEnd: offset + leafText.length, docFrom: docPos, docTo: docPos + node.nodeSize, kind: 'atom', }); - return offset + 1; + return offset + leafText.length; } // For non-leaf nodes, recurse into content @@ -310,6 +340,14 @@ export class SearchIndex { */ offsetRangeToDocRanges(start, end) { const ranges = []; + // A single search hit is gapless in offset space, so consecutive segments + // (text and inline-leaf atoms like lineBreak) belong to one contiguous + // match. Coalesce them into one doc range — otherwise a hit spanning + // `text + lineBreak + text` yields discontiguous text ranges that the + // downstream D5 contiguity guard rejects (SD-3278). A block separator is a + // real split between blocks and ends the current range. The D5 guard still + // catches genuinely separate edits, which are not offset-contiguous. + let current = null; for (const segment of this.segments) { // Skip segments entirely before our range @@ -317,25 +355,49 @@ export class SearchIndex { // Stop if we're past our range if (segment.offsetStart >= end) break; - // Only include text segments in the result - if (segment.kind !== 'text') continue; + // Block separators split blocks; never coalesce across them. + if (segment.kind === 'blockSep') { + if (current) { + ranges.push({ from: current.from, to: current.to }); + current = null; + } + continue; + } - // Calculate the overlap const overlapStart = Math.max(start, segment.offsetStart); const overlapEnd = Math.min(end, segment.offsetEnd); + if (overlapStart >= overlapEnd) continue; + + let from; + let to; + if (segment.kind === 'text') { + from = segment.docFrom + (overlapStart - segment.offsetStart); + to = segment.docFrom + (overlapEnd - segment.offsetStart); + } else { + // Inline leaf atom (lineBreak, hardBreak, image, ...): occupies its + // whole node span and is part of the contiguous match, not a gap. + from = segment.docFrom; + to = segment.docTo; + } - if (overlapStart < overlapEnd) { - // Map the overlap back to document positions - const startInSegment = overlapStart - segment.offsetStart; - const endInSegment = overlapEnd - segment.offsetStart; - - ranges.push({ - from: segment.docFrom + startInSegment, - to: segment.docFrom + endInSegment, - }); + // Coalesce only when the next segment is BOTH offset-contiguous (same + // search hit) AND PM-contiguous (`from === current.to`, i.e. immediately + // adjacent in the document). This merges `text + lineBreak + text` within + // one run into a single range, but never bridges a document gap — a + // skipped/tracked-deleted leaf, a run boundary, or any content the match + // does not actually cover. A non-coalesced segment becomes its own range; + // since it stays offset-contiguous, the downstream block coalescing still + // sees no gap, while the D5 guard keeps rejecting genuinely separate edits. + if (current && segment.offsetStart === current.offsetEnd && from === current.to) { + current.to = to; + current.offsetEnd = overlapEnd; + } else { + if (current) ranges.push({ from: current.from, to: current.to }); + current = { from, to, offsetEnd: overlapEnd }; } } + if (current) ranges.push({ from: current.from, to: current.to }); return ranges; } diff --git a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js index bde782ccf8..6b92f0f791 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js +++ b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js @@ -1,5 +1,6 @@ // @ts-nocheck import { describe, it, expect } from 'vitest'; +import { Schema } from 'prosemirror-model'; import { SearchIndex } from './SearchIndex.js'; describe('SearchIndex.stripDiacritics', () => { @@ -142,6 +143,66 @@ describe('SearchIndex.searchIgnoringDiacritics', () => { }); }); +describe('SearchIndex lineBreak leaf text', () => { + const schema = new Schema({ + nodes: { + doc: { content: 'paragraph+' }, + paragraph: { group: 'block', content: 'inline*' }, + text: { group: 'inline' }, + lineBreak: { + group: 'inline', + inline: true, + atom: true, + leafText: () => '\n', + }, + }, + marks: {}, + }); + + function buildLineBreakDoc() { + return schema.nodes.doc.create(null, [ + schema.nodes.paragraph.create(null, [schema.text('Alpha'), schema.nodes.lineBreak.create(), schema.text('Beta')]), + ]); + } + + it('indexes lineBreak using its declared leafText', () => { + const index = new SearchIndex(); + + index.ensureValid(buildLineBreakDoc()); + + expect(index.text).toBe('Alpha\nBeta'); + expect(index.search('Alpha\nBeta')).toHaveLength(1); + }); + + it('coalesces a hit spanning text + lineBreak + text into one contiguous doc range', () => { + const index = new SearchIndex(); + index.ensureValid(buildLineBreakDoc()); + + // The full 'Alpha\nBeta' span (text + lineBreak + text, all PM-adjacent in + // one block) must map to a single contiguous range, not discontiguous text + // ranges that the downstream D5 contiguity guard would reject. + const ranges = index.offsetRangeToDocRanges(0, index.text.length); + expect(ranges).toHaveLength(1); + }); + + it('does NOT coalesce across a block separator (negative)', () => { + const index = new SearchIndex(); + index.ensureValid( + schema.nodes.doc.create(null, [ + schema.nodes.paragraph.create(null, [schema.text('Alpha')]), + schema.nodes.paragraph.create(null, [schema.text('Beta')]), + ]), + ); + + // 'Alpha\nBeta' here is two paragraphs joined by a block separator, not an + // inline break. The separator is a real split: the span must stay two ranges + // (one per block), never a single editable text range. + expect(index.text).toBe('Alpha\nBeta'); + const ranges = index.offsetRangeToDocRanges(0, index.text.length); + expect(ranges).toHaveLength(2); + }); +}); + describe('SearchIndex searchModel: visible', () => { function textNode(text, { deleted = false } = {}) { return { @@ -155,6 +216,19 @@ describe('SearchIndex searchModel: visible', () => { }; } + function leafNode(typeName, { deleted = false, leafText = undefined } = {}) { + return { + isText: false, + isLeaf: true, + isInline: true, + isBlock: false, + type: { name: typeName, spec: leafText ? { leafText } : {} }, + nodeSize: 1, + marks: deleted ? [{ type: { name: 'trackDelete' } }] : [], + forEach: () => {}, + }; + } + function containerNode(children, { isBlock = false, textBetween = '' } = {}) { return { isText: false, @@ -204,6 +278,23 @@ describe('SearchIndex searchModel: visible', () => { return { doc }; } + function buildDocWithTrackedDeletedLineBreak() { + const paragraph = containerNode( + [textNode('before'), leafNode('lineBreak', { deleted: true, leafText: () => '\n' }), textNode('after')], + { + isBlock: true, + textBetween: 'before\nafter', + }, + ); + + const doc = containerNode([paragraph], { + isBlock: false, + textBetween: 'before\nafter', + }); + + return { doc }; + } + it('excludes pending tracked deletions in visible model', () => { const { doc } = buildDocWithTrackedDeletion(); const index = new SearchIndex(); @@ -250,4 +341,14 @@ describe('SearchIndex searchModel: visible', () => { expect(ranges[0].to - ranges[0].from).toBe('after'.length); expect(ranges[0]).toEqual({ from: 13, to: 18 }); }); + + it('excludes pending tracked deletions on leaf nodes in visible model', () => { + const { doc } = buildDocWithTrackedDeletedLineBreak(); + const index = new SearchIndex(); + + index.ensureValid(doc, { searchModel: 'visible' }); + + expect(index.search('\n')).toHaveLength(0); + expect(index.search('beforeafter')).toHaveLength(0); + }); }); diff --git a/packages/super-editor/src/editors/v1/extensions/search/search.js b/packages/super-editor/src/editors/v1/extensions/search/search.js index 8679aa08e3..e3fcbb196b 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/search.js +++ b/packages/super-editor/src/editors/v1/extensions/search/search.js @@ -49,11 +49,12 @@ const mapIndexMatchesToDocMatches = ({ searchIndex, indexMatches, doc, positionT if (ranges.length === 0) continue; const matchTexts = ranges.map((r) => doc.textBetween(r.from, r.to)); + const matchText = typeof indexMatch.text === 'string' ? indexMatch.text : matchTexts.join(''); const match = { from: ranges[0].from, to: ranges[ranges.length - 1].to, - text: matchTexts.join(''), + text: matchText, id: uuidv4(), ranges, trackerIds: [],