From a58352ccc3baab85b4d9588e7ad60dc6531e04ee Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Wed, 3 Jun 2026 21:36:50 -0300 Subject: [PATCH 1/4] fix(super-editor): preserve generated line breaks in DOCX export (SD-3278) Multi-line text in text-mode mutations stored newlines as a raw \n inside one , which Word collapses while SuperDoc renders a break. Convert newlines to lineBreak nodes at creation, split any residual raw newline into / on export, and make the read model agree that a lineBreak reads as \n so rewrite/search/query stay consistent. Serializes as a Word-native (ECMA-376 17.3.3.1). - buildTextWithTabs: normalize \n, \r\n, \r to lineBreak nodes, gated on parent admission (probed per edit position) for text*-only parents - materializeLineBreak: prefer lineBreak over hardBreak (soft, not page) - getTextNodeForExport: split residual raw newline into / - del-translator: rename every in a split run to - lineBreak.leafText = '\n' so textBetweenWithTabs / charOffsetToDocPos / text-offset-resolver read a break as \n; idempotent rewrite no longer duplicates it, a rewrite to single-line text removes it - SearchIndex honors leafText, and a single hit spanning text+lineBreak+ text coalesces to one contiguous range so query.match('Alpha\nBeta') works (block separators still split; D5 guard intact) - list paragraph beforeinput removes the placeholder break when text is typed; visible text models skip tracked-deleted leaf nodes --- .../v3/handlers/w/del/del-translator.js | 18 +- .../v3/handlers/w/del/del-translator.test.js | 31 ++ .../w/t/helpers/translate-text-node.js | 37 +- .../w/t/helpers/translate-text-node.test.js | 76 +++ .../helpers/text-offset-resolver.test.ts | 61 ++- .../helpers/text-offset-resolver.ts | 19 +- .../helpers/text-with-tabs.test.ts | 113 ++++- .../helpers/text-with-tabs.ts | 89 +++- .../plan-engine/content-controls-wrappers.ts | 5 +- .../plan-engine/executor.ts | 46 +- .../newline-handling.integration.test.ts | 470 ++++++++++++++++++ .../node-materializer.ts | 10 +- .../structural-write-engine.test.ts | 68 +++ .../v1/extensions/line-break/line-break.js | 11 + .../v1/extensions/paragraph/paragraph.js | 15 +- .../v1/extensions/search/SearchIndex.js | 92 +++- .../v1/extensions/search/SearchIndex.test.js | 73 +++ .../editors/v1/extensions/search/search.js | 3 +- 18 files changed, 1167 insertions(+), 70 deletions(-) create mode 100644 packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js index cb7339e376..888ffc46ba 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.js @@ -104,12 +104,18 @@ function decode(params) { return null; } - // ECMA-376 renames w:t → w:delText inside . Other inline content — - // w:noBreakHyphen, w:tab, w:br, etc. — stays as-is; the deletion is - // conveyed by the wrapper alone. Guard the rename so non-text - // atoms inside don't crash. - const textNode = translatedTextNode.elements.find((n) => n.name === 'w:t'); - if (textNode) textNode.name = 'w:delText'; + // ECMA-376 (17.3.3.7) requires w:delText for ALL text runs inside . A + // single run can now hold multiple siblings, because the newline export + // safety net splits text around (e.g. AlphaBeta), + // so rename every direct w:t, not just the first; a leftover inside + // would not be treated as deleted. Other inline content + // (w:noBreakHyphen, w:tab, w:br, etc.) stays as-is; the wrapper alone + // conveys the deletion. + (translatedTextNode.elements || []) + .filter((n) => n.name === 'w:t') + .forEach((n) => { + n.name = 'w:delText'; + }); return { name: 'w:del', diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js index 4aceb41c3d..643fc19441 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/del/del-translator.test.js @@ -177,6 +177,37 @@ describe('w:del translator', () => { expect(result.elements[0].elements[0].name).toBe('w:delText'); }); + it('renames every in a multi-segment run to (newline split)', () => { + const mockTrackedMark = { + type: 'trackDelete', + attrs: { + id: '789', + sourceId: '', + author: 'Test', + authorEmail: 'test@example.com', + date: '2025-10-09T12:00:00Z', + }, + }; + + // The newline export safety net produces one run with interleaved w:t/w:br; + // every w:t inside must become w:delText, not just the first. + exportSchemaToJson.mockReturnValue({ + name: 'w:r', + elements: [ + { name: 'w:t', elements: [{ text: 'Alpha', type: 'text' }] }, + { name: 'w:br' }, + { name: 'w:t', elements: [{ text: 'Beta', type: 'text' }] }, + ], + }); + + const node = { type: 'text', text: 'Alpha\nBeta', marks: [mockTrackedMark] }; + const result = config.decode({ node }); + + const run = result.elements[0]; + expect(run.elements.map((n) => n.name)).toEqual(['w:delText', 'w:br', 'w:delText']); + expect(run.elements.some((n) => n.name === 'w:t')).toBe(false); + }); + it('writes sourceId to w:id for round-trip fidelity', () => { const mockTrackedMark = { type: 'trackDelete', diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js index e9eee7aca8..0586bc1b6c 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.js @@ -44,11 +44,38 @@ export function getTextNodeForExport(text, marks, params) { partPath: resolveExportPartPath(params), }); - textNodes.push({ - name: 'w:t', - elements: [{ text, type: 'text' }], - attributes: nodeAttrs, - }); + const textValue = typeof text === 'string' ? text : ''; + // Normalize CRLF/CR to LF so Windows line endings export Word-native breaks + // too, rather than leaving a stray carriage return inside . + const normalizedText = textValue.includes('\r') ? textValue.replace(/\r\n?/g, '\n') : textValue; + if (normalizedText.includes('\n')) { + // Export safety net: a raw newline inside is whitespace that Word + // collapses on open (it is not the OOXML representation of a line break), + // while SuperDoc still renders it as a break: the SD-3278 + // divergence. Emit a Word-native between + // segments instead. Everything stays inside this single run so the + // surrounding / wrappers keep wrapping exactly one run. + const segments = normalizedText.split('\n'); + segments.forEach((segment, index) => { + if (segment.length > 0) { + const segmentNeedsSpace = /^\s|\s$/.test(segment); + textNodes.push({ + name: 'w:t', + elements: [{ text: segment, type: 'text' }], + attributes: segmentNeedsSpace ? { 'xml:space': 'preserve' } : null, + }); + } + if (index < segments.length - 1) { + textNodes.push({ name: 'w:br' }); + } + }); + } else { + textNodes.push({ + name: 'w:t', + elements: [{ text: normalizedText, type: 'text' }], + attributes: nodeAttrs, + }); + } // For custom mark export, we need to add a bookmark start and end tag // And store attributes in the bookmark name diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js index 75f0a66550..979c3b9b37 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/w/t/helpers/translate-text-node.test.js @@ -134,4 +134,80 @@ describe('getTextNodeForExport', () => { const runPropertiesChange = runProperties.elements.find((element) => element.name === 'w:rPrChange'); expect(runPropertiesChange.attributes['w:id']).toBe('7'); }); + + // SD-3278 export safety net: a raw newline left inside a PM text + // node (e.g. from an imported .docx that stored breaks as literal '\n') must + // export as a Word-native , not a collapsed newline inside . + describe('raw newline export safety net', () => { + const contentElements = (result) => result.elements.filter((el) => el.name === 'w:t' || el.name === 'w:br'); + + it('exports a single newline as // within one run', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + expect(result.name).toBe('w:r'); + const content = contentElements(result); + expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']); + expect(content[0].elements[0].text).toBe('Alpha'); + expect(content[2].elements[0].text).toBe('Beta'); + }); + + it('never leaves a raw newline inside a ', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts.some((el) => el.elements[0].text.includes('\n'))).toBe(false); + }); + + it('emits a soft break (no w:type="page") for the ', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + const br = result.elements.find((el) => el.name === 'w:br'); + expect(br).toBeDefined(); + expect(br.attributes?.['w:type']).toBeUndefined(); + }); + + it('leaves newline-free text as a single (unchanged)', () => { + const result = getTextNodeForExport('hello world', [], buildParams()); + const content = contentElements(result); + expect(content).toHaveLength(1); + expect(content[0].name).toBe('w:t'); + expect(content[0].elements[0].text).toBe('hello world'); + }); + + it('emits a for each newline including leading, trailing, and consecutive newlines', () => { + const result = getTextNodeForExport('\nA\n\nB\n', [], buildParams()); + const content = contentElements(result); + expect(content.map((el) => el.name)).toEqual(['w:br', 'w:t', 'w:br', 'w:br', 'w:t', 'w:br']); + const texts = content.filter((el) => el.name === 'w:t').map((el) => el.elements[0].text); + expect(texts).toEqual(['A', 'B']); + }); + + it('sets xml:space="preserve" only on segments with edge whitespace', () => { + const result = getTextNodeForExport('Alpha \n Beta', [], buildParams()); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts[0].elements[0].text).toBe('Alpha '); + expect(texts[0].attributes).toEqual({ 'xml:space': 'preserve' }); + expect(texts[1].elements[0].text).toBe(' Beta'); + expect(texts[1].attributes).toEqual({ 'xml:space': 'preserve' }); + }); + + it('does not set xml:space on segments without edge whitespace', () => { + const result = getTextNodeForExport('Alpha\nBeta', [], buildParams()); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts[0].attributes).toBeNull(); + expect(texts[1].attributes).toBeNull(); + }); + + it('normalizes CRLF to a on export', () => { + const content = contentElements(getTextNodeForExport('Alpha\r\nBeta', [], buildParams())); + expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']); + expect(content[0].elements[0].text).toBe('Alpha'); + expect(content[2].elements[0].text).toBe('Beta'); + }); + + it('normalizes a bare CR to a without leaving a stray carriage return in ', () => { + const result = getTextNodeForExport('Alpha\rBeta', [], buildParams()); + const content = contentElements(result); + expect(content.map((el) => el.name)).toEqual(['w:t', 'w:br', 'w:t']); + const texts = result.elements.filter((el) => el.name === 'w:t'); + expect(texts.some((el) => el.elements[0].text.includes('\r'))).toBe(false); + }); + }); }); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts index eb1374ccc8..1ab66b302b 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.test.ts @@ -9,6 +9,7 @@ import { type NodeOptions = { text?: string; marks?: Array<{ type: { name: string } }>; + leafText?: (node: ProseMirrorNode) => string; isInline?: boolean; isBlock?: boolean; isLeaf?: boolean; @@ -28,7 +29,7 @@ function createNode(typeName: string, children: ProseMirrorNode[] = [], options: const nodeSize = isText ? text.length : options.nodeSize != null ? options.nodeSize : isLeaf ? 1 : contentSize + 2; return { - type: { name: typeName }, + type: { name: typeName, spec: options.leafText ? { leafText: options.leafText } : {} }, text: isText ? text : undefined, nodeSize, isText, @@ -138,6 +139,25 @@ describe('resolveTextRangeInBlock', () => { expect(result).toEqual({ from: 6, to: 7 }); }); + + it('maps visible offsets across tracked deleted leaf nodes without counting them', () => { + const textA = createNode('text', [], { text: 'A' }); + const deletedBreak = createNode('lineBreak', [], { + isInline: true, + isLeaf: true, + marks: [{ type: { name: 'trackDelete' } }], + leafText: () => '\n', + }); + const textB = createNode('text', [], { text: 'B' }); + const paragraph = createNode('paragraph', [textA, deletedBreak, textB], { + isBlock: true, + inlineContent: true, + }); + + const result = resolveTextRangeInBlock(paragraph, 0, { start: 1, end: 2 }, { textModel: 'visible' }); + + expect(result).toEqual({ from: 3, to: 4 }); + }); }); describe('computeTextContentLength', () => { @@ -204,6 +224,26 @@ describe('computeTextContentLength', () => { expect(computeTextContentLength(paragraph, { textModel: 'visible' })).toBe(2); expect(textContentInBlock(paragraph, { textModel: 'visible' })).toBe('AB'); }); + + it('excludes tracked deleted leaf nodes in the visible text model', () => { + const textA = createNode('text', [], { text: 'A' }); + const deletedBreak = createNode('lineBreak', [], { + isInline: true, + isLeaf: true, + marks: [{ type: { name: 'trackDelete' } }], + leafText: () => '\n', + }); + const textB = createNode('text', [], { text: 'B' }); + const paragraph = createNode('paragraph', [textA, deletedBreak, textB], { + isBlock: true, + inlineContent: true, + }); + + expect(computeTextContentLength(paragraph)).toBe(3); + expect(computeTextContentLength(paragraph, { textModel: 'visible' })).toBe(2); + expect(textContentInBlock(paragraph)).toBe('A\nB'); + expect(textContentInBlock(paragraph, { textModel: 'visible' })).toBe('AB'); + }); }); describe('pmPositionToTextOffset', () => { @@ -268,4 +308,23 @@ describe('pmPositionToTextOffset', () => { expect(pmPositionToTextOffset(paragraph, 0, 6, { textModel: 'visible' })).toBe(1); expect(pmPositionToTextOffset(paragraph, 0, 7, { textModel: 'visible' })).toBe(2); }); + + it('keeps PM positions inside tracked deleted leaf nodes at the surrounding visible offset', () => { + const textA = createNode('text', [], { text: 'A' }); + const deletedBreak = createNode('lineBreak', [], { + isInline: true, + isLeaf: true, + marks: [{ type: { name: 'trackDelete' } }], + leafText: () => '\n', + }); + const textB = createNode('text', [], { text: 'B' }); + const paragraph = createNode('paragraph', [textA, deletedBreak, textB], { + isBlock: true, + inlineContent: true, + }); + + expect(pmPositionToTextOffset(paragraph, 0, 2, { textModel: 'visible' })).toBe(1); + expect(pmPositionToTextOffset(paragraph, 0, 3, { textModel: 'visible' })).toBe(1); + expect(pmPositionToTextOffset(paragraph, 0, 4, { textModel: 'visible' })).toBe(2); + }); }); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts index 5395f05ce1..17e95eb01d 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-offset-resolver.ts @@ -29,6 +29,10 @@ function shouldSkipTextNode(node: ProseMirrorNode, options?: TextOffsetOptions): return isVisibleTextModel(options) && hasTrackDeleteMark(node); } +function shouldSkipLeafNode(node: ProseMirrorNode, options?: TextOffsetOptions): boolean { + return isVisibleTextModel(options) && hasTrackDeleteMark(node); +} + function resolveSegmentPosition( targetOffset: number, segmentStart: number, @@ -87,6 +91,10 @@ export function pmPositionToTextOffset( if (node.isLeaf) { const endPos = docPos + node.nodeSize; + if (shouldSkipLeafNode(node, options)) { + if (pmPos < endPos) done = true; + return; + } if (pmPos >= endPos) { offset += 1; } else { @@ -141,6 +149,7 @@ export function computeTextContentLength(blockNode: ProseMirrorNode, options?: T return; } if (node.isLeaf) { + if (shouldSkipLeafNode(node, options)) return; length += 1; return; } @@ -229,6 +238,7 @@ export function resolveTextRangeInBlock( } if (node.isLeaf) { + if (shouldSkipLeafNode(node, options)) return; advanceSegment(1, docPos, docPos + node.nodeSize); return; } @@ -262,7 +272,14 @@ export function textContentInBlock(blockNode: ProseMirrorNode, options?: TextOff } if (node.isLeaf) { - text += '\ufffc'; + if (shouldSkipLeafNode(node, options)) return; + // Honor a leaf's declared visible text (e.g. lineBreak -> '\n', + // noBreakHyphen -> U+2011) so this content model agrees with the visible + // document and with the offset model. All leafText values are one + // character, matching the 1-per-leaf length used by the offset helpers + // above; other leaves fall back to the U+FFFC placeholder. + const leafText = (node.type?.spec as { leafText?: (n: ProseMirrorNode) => string } | undefined)?.leafText; + text += typeof leafText === 'function' ? leafText(node) : '\ufffc'; return; } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts index 8f5c99b9a8..a63576cf03 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.test.ts @@ -2,7 +2,9 @@ import { describe, expect, it, vi } from 'vitest'; import { Fragment, Schema } from 'prosemirror-model'; import { buildTextWithTabs, parentAllowsNodeAt, textBetweenWithTabs } from './text-with-tabs.js'; -function makeRealSchema(options: { hasTab?: boolean; hasNoBreakHyphen?: boolean; hasGenericLeaf?: boolean } = {}) { +function makeRealSchema( + options: { hasTab?: boolean; hasLineBreak?: boolean; hasNoBreakHyphen?: boolean; hasGenericLeaf?: boolean } = {}, +) { const nodes: Record = { doc: { content: 'paragraph+' }, paragraph: { group: 'block', content: 'inline*' }, @@ -13,6 +15,11 @@ function makeRealSchema(options: { hasTab?: boolean; hasNoBreakHyphen?: boolean; // Tab is non-leaf, which is why `textBetweenWithTabs` (not PM's built-in textBetween) is needed. nodes.tab = { group: 'inline', inline: true, atom: true, content: 'inline*' }; } + if (options.hasLineBreak) { + // Mirrors the real extensions/line-break/line-break.js shape: inline atom + // that disallows marks (`marks: ''`) and renders to
/ exports to . + nodes.lineBreak = { group: 'inline', inline: true, atom: true, marks: '' }; + } if (options.hasNoBreakHyphen) { // Mirrors the real extensions/no-break-hyphen schema: inline leaf atom with leafText. nodes.noBreakHyphen = { group: 'inline', inline: true, atom: true, leafText: () => '‑' }; @@ -93,12 +100,114 @@ describe('buildTextWithTabs', () => { expect(result.child(0).text).toBe('x'); expect(result.child(0).marks.some((m: any) => m.type.name === 'bold')).toBe(true); expect(result.child(1).type.name).toBe('tab'); - // Tab carries the run's marks — the OOXML exporter reads node.marks on tab + // Tab carries the run's marks. The OOXML exporter reads node.marks on tab // nodes (tab-translator.js:53) to emit matching around . expect(result.child(1).marks.some((m: any) => m.type.name === 'bold')).toBe(true); expect(result.child(2).text).toBe('y'); expect(result.child(2).marks.some((m: any) => m.type.name === 'bold')).toBe(true); }); + + // SD-3278: a literal '\n' inside a text node exports as raw newline + // inside , which Word collapses. It must become a `lineBreak` node so the + // exporter emits a Word-native . + it('splits text around a single newline into text + lineBreak + text', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined); + expect(result).toBeInstanceOf(Fragment); + const fragment = result as Fragment; + expect(fragment.childCount).toBe(3); + expect(fragment.child(0).text).toBe('Alpha'); + expect(fragment.child(1).type.name).toBe('lineBreak'); + expect(fragment.child(2).text).toBe('Beta'); + }); + + it('emits a lineBreak node even when the schema has no tab node type', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined) as Fragment; + expect(result).toBeInstanceOf(Fragment); + expect(result.childCount).toBe(3); + expect(result.child(1).type.name).toBe('lineBreak'); + }); + + it('keeps the raw newline in a single text node when the schema has no lineBreak node type', () => { + const schema = makeRealSchema({ hasTab: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined); + expect((result as any).isText).toBe(true); + expect((result as any).text).toBe('Alpha\nBeta'); + }); + + it('creates the lineBreak node bare in the creation path', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const boldMark = schema.marks.bold.create(); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', [boldMark]) as Fragment; + expect(result.child(0).marks.some((m: any) => m.type.name === 'bold')).toBe(true); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(1).marks.length).toBe(0); + expect(result.child(2).marks.some((m: any) => m.type.name === 'bold')).toBe(true); + }); + + it('omits empty segments around leading, trailing, and consecutive newlines', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const lead = buildTextWithTabs(schema, '\nfoo', undefined) as Fragment; + expect(lead.childCount).toBe(2); + expect(lead.child(0).type.name).toBe('lineBreak'); + expect(lead.child(1).text).toBe('foo'); + + const doubled = buildTextWithTabs(schema, 'a\n\nb', undefined) as Fragment; + expect(doubled.childCount).toBe(4); + expect(doubled.child(0).text).toBe('a'); + expect(doubled.child(1).type.name).toBe('lineBreak'); + expect(doubled.child(2).type.name).toBe('lineBreak'); + expect(doubled.child(3).text).toBe('b'); + }); + + it('interleaves tab and lineBreak nodes when both control characters are present', () => { + const schema = makeRealSchema({ hasTab: true, hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'a\tb\nc', undefined) as Fragment; + expect(result.childCount).toBe(5); + expect(result.child(0).text).toBe('a'); + expect(result.child(1).type.name).toBe('tab'); + expect(result.child(2).text).toBe('b'); + expect(result.child(3).type.name).toBe('lineBreak'); + expect(result.child(4).text).toBe('c'); + }); + + it('keeps the raw tab literal but still splits the newline when tabs are disallowed', () => { + const schema = makeRealSchema({ hasTab: true, hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'a\tb\nc', undefined, { parentAllowsTab: false }) as Fragment; + expect(result.childCount).toBe(3); + expect(result.child(0).text).toBe('a\tb'); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(2).text).toBe('c'); + }); + + // Generated/SDK text often uses CRLF; normalize CRLF and bare CR to + // line breaks so no stray carriage return survives in a text segment. + it('normalizes CRLF to a lineBreak node', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\r\nBeta', undefined) as Fragment; + expect(result.childCount).toBe(3); + expect(result.child(0).text).toBe('Alpha'); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(2).text).toBe('Beta'); + }); + + it('normalizes a bare CR to a lineBreak node without leaving a stray carriage return', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\rBeta', undefined) as Fragment; + expect(result.childCount).toBe(3); + expect(result.child(0).text).toBe('Alpha'); + expect(result.child(1).type.name).toBe('lineBreak'); + expect(result.child(2).text).toBe('Beta'); + }); + + // A `text*`-only parent (e.g. total-page-number) rejects lineBreak. + it('keeps the raw newline in a single text node when parentAllowsLineBreak is false', () => { + const schema = makeRealSchema({ hasLineBreak: true }); + const result = buildTextWithTabs(schema, 'Alpha\nBeta', undefined, { parentAllowsLineBreak: false }); + expect((result as any).isText).toBe(true); + expect((result as any).text).toBe('Alpha\nBeta'); + }); }); describe('parentAllowsNodeAt', () => { diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts index 3a44a8582f..82e9bbf64f 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/text-with-tabs.ts @@ -10,38 +10,77 @@ import type { Transaction } from 'prosemirror-state'; import { TrackDeleteMarkName } from '../../extensions/track-changes/constants.js'; /** - * Build a text-or-fragment suitable for insertion, splitting on '\t' and - * inserting schema `tab` nodes at each split. + * Build a text-or-fragment suitable for insertion, splitting on '\t' and '\n' + * and inserting schema `tab` / `lineBreak` nodes at each split. * - * Returns a plain text node when the schema has no `tab` node type, when the - * parent disallows tab nodes (see {@link parentAllowsTabAt}), or when the text - * contains no tab characters. The raw '\t' is preserved inside the text node - * so exporters and readers still see the character. + * A tab split produces a `tab` node; a newline split produces a `lineBreak` + * node. Each is only performed when the schema exposes the matching node type + * (and, for tabs, when the parent admits them; see {@link parentAllowsNodeAt}). + * A control character that cannot be materialized stays literal inside the + * surrounding text node so exporters and readers still see it. * - * Callers are responsible for ensuring `text` is non-empty (ProseMirror's - * `schema.text` throws on empty input). + * Newlines must become `lineBreak` nodes (not raw '\n' inside a text node): + * a literal newline inside `` is whitespace that Word collapses on open + * (it is not the OOXML representation of a line break), so it would drop the + * visible break while SuperDoc still renders it. The `lineBreak` node + * round-trips to a Word-native ``. See SD-3278. + * + * Returns a plain text node when the text contains no splittable control + * character (the common case). Callers are responsible for ensuring `text` is + * non-empty (ProseMirror's `schema.text` throws on empty input). */ export function buildTextWithTabs( schema: Schema, text: string, marks: readonly ProseMirrorMark[] | undefined, - opts: { parentAllowsTab?: boolean } = {}, + opts: { parentAllowsTab?: boolean; parentAllowsLineBreak?: boolean } = {}, ): ProseMirrorNode | ProseMirrorFragment { - // Check the cheapest/most selective predicate first — most calls carry no '\t'. - if (!text.includes('\t')) return schema.text(text, marks); + // Normalize CRLF/CR to LF up front so Windows line endings (common in + // generated/SDK text) are treated as line breaks too. Only allocate when a + // carriage return is actually present. + const normalized = text.includes('\r') ? text.replace(/\r\n?/g, '\n') : text; + + // Check the cheapest/most selective predicate first; most calls carry neither. + const hasTab = normalized.includes('\t'); + const hasNewline = normalized.includes('\n'); + if (!hasTab && !hasNewline) return schema.text(normalized, marks); + + // `lineBreak` is a normal inline node, but some textblocks restrict their + // content to `text*` (e.g. total-page-number) and reject it. Gate on parent + // admission the same way tabs do: callers that target restrictive parents + // pass the probe result; others default to allowed. When disallowed or absent + // we fall back to literal text, and the export safety net still converts any + // raw newline to `` on export. + const tabNodeType = hasTab && opts.parentAllowsTab !== false ? schema.nodes?.tab : undefined; + const lineBreakNodeType = hasNewline && opts.parentAllowsLineBreak !== false ? schema.nodes?.lineBreak : undefined; + if (!tabNodeType && !lineBreakNodeType) return schema.text(normalized, marks); - const tabNodeType = schema.nodes?.tab; - if (!tabNodeType || opts.parentAllowsTab === false) return schema.text(text, marks); + // `NodeType.create` takes `readonly Mark[] | undefined` (not null) for marks. + const tabMarks: readonly ProseMirrorMark[] | undefined = marks ?? undefined; + + // Split only on the control characters we can replace with a node; any other + // control character stays literal inside the surrounding text segments. + const splitPattern = [tabNodeType ? '\\t' : null, lineBreakNodeType ? '\\n' : null].filter(Boolean).join('|'); + const parts = normalized.split(new RegExp(`(${splitPattern})`)); - const tabMarks = (marks ?? null) as ProseMirrorMark[] | null; - const parts = text.split('\t'); const nodes: ProseMirrorNode[] = []; - for (let i = 0; i < parts.length; i++) { - if (parts[i]) nodes.push(schema.text(parts[i], marks)); - // Carry the surrounding marks onto the tab node so the OOXML exporter - // wraps `` in a matching `` — keeps formatting unbroken - // across the tab (bold-run | tab | bold-run rather than bold | plain | bold). - if (i < parts.length - 1) nodes.push(tabNodeType.create(null, null, tabMarks)); + for (const part of parts) { + if (part === '') continue; // schema.text throws on empty input + if (part === '\t' && tabNodeType) { + // Carry the surrounding marks onto the tab node so the OOXML exporter + // wraps `` in a matching ``, keeping formatting unbroken + // across the tab (bold-run | tab | bold-run rather than bold | plain | bold). + nodes.push(tabNodeType.create(null, null, tabMarks)); + } else if (part === '\n' && lineBreakNodeType) { + // Create the break bare: a soft line break carries no run formatting. + // (Tracking an inserted break so it exports inside is a separate + // concern: br-translator does not yet route node.marks the way + // noBreakHyphen's translator does; see SD-3371. It is not a schema limit: + // a leaf atom can carry marks, governed by the parent run/paragraph.) + nodes.push(lineBreakNodeType.create()); + } else { + nodes.push(schema.text(part, marks)); + } } return Fragment.from(nodes); } @@ -128,8 +167,14 @@ export function textBetweenWithTabs( } if (node.isLeaf) { if (node.isInline) { + if ( + options.textModel === 'visible' && + node.marks?.some((mark: ProseMirrorMark) => mark.type.name === TrackDeleteMarkName) + ) { + return false; + } // Honor PM's `leafText` NodeSpec contract: an inline leaf can declare - // its visible text representation (e.g. noBreakHyphen → U+2011) so + // its visible text representation (e.g. noBreakHyphen -> U+2011) so // flattened reads match the rendered glyph instead of producing the // generic placeholder. Falls back to `leafFallback` when undefined. const leafTextFn = node.type?.spec?.leafText; diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts index 82ab9e11ff..4deabd771e 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/content-controls-wrappers.ts @@ -997,7 +997,10 @@ function insertTextAroundSdt( const { tr } = editor.state; const tabType = editor.schema.nodes?.tab; const parentAllowsTab = tabType && content.includes('\t') ? parentAllowsNodeAt(tr, pos, tabType) : false; - tr.insert(pos, buildTextWithTabs(editor.schema, content, undefined, { parentAllowsTab })); + const lineBreakType = editor.schema.nodes?.lineBreak; + const parentAllowsLineBreak = + lineBreakType && /[\r\n]/.test(content) ? parentAllowsNodeAt(tr, pos, lineBreakType) : false; + tr.insert(pos, buildTextWithTabs(editor.schema, content, undefined, { parentAllowsTab, parentAllowsLineBreak })); dispatchTransaction(editor, tr); return true; } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts index cf2c46d86b..0ec53f6615 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/executor.ts @@ -886,6 +886,16 @@ export function executeTextRewrite( const replacementText = getReplacementText(step.args.replacement); const marks = resolveMarksForRange(editor, target, step); + // A rewrite range can start in a normal parent (a run) and span into a + // `text*`-only inline container (e.g. total-page-number) that rejects + // lineBreak. The edits below land at remapped positions whose parent may + // differ from `absFrom`, so probe admission at each actual edit position + // rather than once at the range start: a newline that lands in a restrictive + // parent falls back to literal text (the export safety net still emits + // ). Probe on the current `tr` so prior in-loop steps are reflected. + const lineBreakNodeType = editor.state.schema.nodes?.lineBreak; + const parentAllowsLineBreakAt = (pos: number): boolean => + lineBreakNodeType ? parentAllowsNodeAt(tr, pos, lineBreakNodeType) : false; const structuralRewrite = resolveStructuralRangeRewrite(tr.doc, absFrom, absTo, step); if (structuralRewrite) { @@ -935,7 +945,9 @@ export function executeTextRewrite( tr.delete(absFrom, absTo); return { changed: target.text.length > 0 }; } - const content = buildTextWithTabs(editor.state.schema, replacementText, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, replacementText, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(absFrom), + }); tr.replaceWith(absFrom, absTo, content); return { changed: replacementText !== target.text }; } @@ -991,10 +1003,14 @@ export function executeTextRewrite( if (change.type === 'delete') { tr.delete(remap(change.docFrom), remap(change.docTo)); } else if (change.type === 'insert') { - const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(remap(change.docPos)), + }); tr.insert(remap(change.docPos), content); } else { - const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, change.newText, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(remap(change.docFrom)), + }); tr.replaceWith(remap(change.docFrom), remap(change.docTo), content); } } @@ -1002,12 +1018,15 @@ export function executeTextRewrite( // Pure deletion after trimming: a non-empty replacement whose new text is // fully contained in the old text's common prefix + suffix collapses to an // empty delta (e.g. "best endeavours to:" → "endeavours to:" leaves - // trimmedNew === ""). Delete the removed range rather than building - // schema.text('') — ProseMirror rejects empty text nodes. + // trimmedNew === ""). This also covers removing a lineBreak, now that + // lineBreak counts as "\n" in the diff. Delete the removed range rather than + // building schema.text(''), which ProseMirror rejects for empty text. tr.delete(trimmedFrom, trimmedTo); } else { // 0 or 1 word change: replace just the trimmed range. - const content = buildTextWithTabs(editor.state.schema, trimmedNew, asProseMirrorMarks(marks)); + const content = buildTextWithTabs(editor.state.schema, trimmedNew, asProseMirrorMarks(marks), { + parentAllowsLineBreak: parentAllowsLineBreakAt(trimmedFrom), + }); tr.replaceWith(trimmedFrom, trimmedTo, content); } @@ -1138,7 +1157,12 @@ export function executeTextInsert( const tabNodeType = editor.state.schema.nodes?.tab; const parentAllowsTab = tabNodeType && text.includes('\t') ? parentAllowsNodeAt(tr, absPos, tabNodeType) : false; - tr.insert(absPos, buildTextWithTabs(editor.state.schema, text, marks, { parentAllowsTab })); + // Restrictive parents like total-page-number are `text*` and reject lineBreak; + // probe admission so newline-bearing inserts fall back to literal text there. + const lineBreakNodeType = editor.state.schema.nodes?.lineBreak; + const parentAllowsLineBreak = + lineBreakNodeType && /[\r\n]/.test(text) ? parentAllowsNodeAt(tr, absPos, lineBreakNodeType) : false; + tr.insert(absPos, buildTextWithTabs(editor.state.schema, text, marks, { parentAllowsTab, parentAllowsLineBreak })); return { changed: true }; } @@ -1323,7 +1347,13 @@ export function executeSpanTextRewrite( // For single replacement block, use flat replacement into the span if (replacementBlocks.length === 1) { const marks = resolveSpanMarks(editor, target, policy, step.id); - const content = buildTextWithTabs(editor.state.schema, replacementBlocks[0], asProseMirrorMarks(marks)); + // Probe parent admission so a newline replacement into a `text*`-only span + // parent falls back to literal text (the export safety net handles the rest). + const lineBreakNodeType = editor.state.schema.nodes?.lineBreak; + const parentAllowsLineBreak = lineBreakNodeType ? parentAllowsNodeAt(tr, absFrom, lineBreakNodeType) : false; + const content = buildTextWithTabs(editor.state.schema, replacementBlocks[0], asProseMirrorMarks(marks), { + parentAllowsLineBreak, + }); tr.replaceWith(absFrom, absTo, content); return { changed: true }; } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts new file mode 100644 index 0000000000..442c73c1be --- /dev/null +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts @@ -0,0 +1,470 @@ +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js'; +import { executeTextInsert, executeTextRewrite } from './executor.ts'; + +/** + * SD-3278: multi-line text forwarded into text-mode mutations (e.g. + * an agentic SDK workflow) must export Word-native line breaks, not a raw '\n' + * inside one that Word collapses. + * + * Two paths are covered: + * - Creation path: doc.replace / text.rewrite with a single '\n' must build a + * `lineBreak` PM node (not a literal '\n' in a text node). + * - Export safety net: a text node that already holds a raw '\n' (e.g. from an + * imported .docx that stored breaks as literal newlines) must still export a + * Word-native . + */ + +function makeSchemaEditor(paragraphs: string[] = ['hello world']) { + return initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: paragraphs.map((text) => ({ + type: 'paragraph', + attrs: {}, + content: [{ type: 'run', attrs: {}, content: [{ type: 'text', text }] }], + })), + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; +} + +function getFirstMatchRef(editor: any, pattern: string): string { + const match = editor.doc.query.match({ select: { type: 'text', pattern }, require: 'first' }); + const ref = match?.items?.[0]?.handle?.ref; + if (!ref) throw new Error(`Could not resolve ref for pattern "${pattern}"`); + return ref; +} + +function hasNodeOfType(editor: any, name: string): boolean { + let found = false; + editor.state.doc.descendants((node: any) => { + if (found) return false; + if (node.type.name === name) { + found = true; + return false; + } + return true; + }); + return found; +} + +function paragraphCount(editor: any): number { + let count = 0; + editor.state.doc.forEach((node: any) => { + if (node.type.name === 'paragraph') count += 1; + }); + return count; +} + +function countNodeType(editor: any, name: string): number { + let count = 0; + editor.state.doc.descendants((node: any) => { + if (node.type.name === name) count += 1; + }); + return count; +} + +describe('text-mode mutations: single newline handling (SD-3278)', () => { + let editor: any | undefined; + + afterEach(() => { + editor?.destroy(); + editor = undefined; + }); + + it('direct doc.replace with a single newline builds a lineBreak node inside one paragraph', () => { + editor = makeSchemaEditor(['hello world']); + + const receipt = editor.doc.replace( + { ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, + { changeMode: 'direct' }, + ); + + expect(receipt.success).toBe(true); + // A single '\n' stays within the paragraph (only '\n\n+' splits paragraphs). + expect(paragraphCount(editor)).toBe(1); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(true); + // It must NOT become a page break. + expect(hasNodeOfType(editor, 'hardBreak')).toBe(false); + }); + + it('tracked doc.replace with a single newline builds a lineBreak node', () => { + editor = makeSchemaEditor(['hello world']); + + const receipt = editor.doc.replace( + { ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, + { changeMode: 'tracked' }, + ); + + expect(receipt.success).toBe(true); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(true); + expect(hasNodeOfType(editor, 'hardBreak')).toBe(false); + }); + + // Read-model consistency (SD-3278): a lineBreak created on the write side must + // read back as '\n' on the diff/search paths, or query.match cannot find + // break-bearing content and an identical rewrite duplicates the break. + it('finds break-bearing content by \\n and is idempotent under an identical rewrite (no duplicate break)', () => { + editor = makeSchemaEditor(['hello world']); + editor.doc.replace({ ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, { changeMode: 'direct' }); + expect(countNodeType(editor, 'lineBreak')).toBe(1); + + // query.match must see the break as '\n' to resolve a ref to the content. + const ref = getFirstMatchRef(editor, 'Alpha\nBeta'); + editor.doc.replace({ ref, text: 'Alpha\nBeta' }, { changeMode: 'direct' }); + // The identical rewrite is a no-op: still exactly one break, not two. + expect(countNodeType(editor, 'lineBreak')).toBe(1); + }); + + it('rewriting break-bearing content to single-line text removes the break', () => { + editor = makeSchemaEditor(['hello world']); + editor.doc.replace({ ref: getFirstMatchRef(editor, 'hello world'), text: 'Alpha\nBeta' }, { changeMode: 'direct' }); + expect(countNodeType(editor, 'lineBreak')).toBe(1); + + const ref = getFirstMatchRef(editor, 'Alpha\nBeta'); + editor.doc.replace({ ref, text: 'AlphaBeta' }, { changeMode: 'direct' }); + expect(countNodeType(editor, 'lineBreak')).toBe(0); + expect(editor.state.doc.firstChild?.textContent).toBe('AlphaBeta'); + }); + + // Proves the bot P1 (rewrite char-diff counts the break) DIRECTLY, without + // query.match: rewriting `AlphaBeta` to the same visible text must + // be a no-op (the diff reads the break as '\n' via leafText), so it neither + // duplicates nor strands the break. + it('an identical rewrite over an existing lineBreak node is a no-op (direct target)', () => { + // paragraph > run > [ text 'Alpha', lineBreak, text 'Beta' ] + editor = initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: [ + { + type: 'paragraph', + attrs: {}, + content: [ + { + type: 'run', + attrs: {}, + content: [{ type: 'text', text: 'Alpha' }, { type: 'lineBreak' }, { type: 'text', text: 'Beta' }], + }, + ], + }, + ], + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + expect(countNodeType(editor, 'lineBreak')).toBe(1); + + // Span the inline content: first text/lineBreak start .. last node end. + let absFrom = -1; + let absTo = -1; + editor.state.doc.descendants((node: any, pos: number) => { + if (node.isText || node.type.name === 'lineBreak') { + if (absFrom === -1) absFrom = pos; + absTo = pos + node.nodeSize; + } + }); + + const tr = editor.state.tr; + const target = { + kind: 'range', + stepId: 'idem', + op: 'text.rewrite', + blockId: '__selection__', + from: 0, + to: 0, + absFrom, + absTo, + text: 'Alpha\nBeta', + capturedStyle: undefined, + } as any; + const step = { + id: 'idem-rewrite', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'Alpha\nBeta' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + executeTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any); + editor.dispatch(tr); + + // Still exactly one break: not duplicated (the bot P1 regression) and not removed. + expect(countNodeType(editor, 'lineBreak')).toBe(1); + expect(editor.state.doc.firstChild?.textContent).toBe('Alpha\nBeta'); + }); +}); + +// lineBreak must not be forced into a parent that rejects it. The +// total-page-number node is `content: 'text*'`, so a newline insert there must +// fall back to literal text rather than throwing (mirrors the tab guard). +describe('newline insert into a restrictive (text*) parent', () => { + let editor: any | undefined; + + afterEach(() => { + editor?.destroy(); + editor = undefined; + }); + + function makeEditorWithTotalPageCount() { + return initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: [ + { + type: 'paragraph', + attrs: {}, + content: [ + { + type: 'run', + attrs: {}, + content: [{ type: 'total-page-number', attrs: {}, content: [{ type: 'text', text: '7' }] }], + }, + ], + }, + ], + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + } + + function findTotalPageNumberPos(ed: any): number { + let pos: number | undefined; + ed.state.doc.descendants((node: any, nodePos: number) => { + if (pos !== undefined) return false; + if (node.type.name === 'total-page-number') { + pos = nodePos; + return false; + } + return true; + }); + if (pos === undefined) throw new Error('total-page-number node not found'); + return pos; + } + + it('inserts a\\nb into total-page-number without throwing and without creating a lineBreak node', () => { + editor = makeEditorWithTotalPageCount(); + const nodePos = findTotalPageNumberPos(editor); + const innerPos = nodePos + 1; // inside the total-page-number, before its '7' + + const tr = editor.state.tr; + const target = { + kind: 'range', + stepId: 'step-1', + op: 'text.insert', + blockId: 'total-page-number-1', + from: 0, + to: 0, + absFrom: innerPos, + absTo: innerPos, + text: '', + marks: [], + } as any; + const step = { + id: 'insert-newline-into-total-page-number', + op: 'text.insert', + where: { by: 'ref', ref: 'ignored' }, + args: { position: 'before', content: { text: 'a\nb' } }, + } as any; + + expect(() => executeTextInsert(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + const totalPageNumber = editor.state.doc.nodeAt(nodePos); + expect(totalPageNumber?.type.name).toBe('total-page-number'); + expect(totalPageNumber?.textContent).toBe('a\nb7'); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); + + // The fix also threads the parent-admission probe into the rewrite path + // (executeTextRewrite / executeSpanTextRewrite), not just text.insert. Rewriting + // text INSIDE a text*-only parent with a newline must not throw or inject a + // lineBreak; it falls back to literal text because the field is `text*` only. + it('rewrites text inside total-page-number with a\\nb without throwing or creating a lineBreak node', () => { + editor = makeEditorWithTotalPageCount(); + const nodePos = findTotalPageNumberPos(editor); + const innerPos = nodePos + 1; // the '7' text node sits at [innerPos, innerPos + 1] + + const tr = editor.state.tr; + const target = { + kind: 'range', + stepId: 'rewrite-step', + op: 'text.rewrite', + // '__selection__' makes resolveMarksForRange skip style capture (no block). + blockId: '__selection__', + from: 0, + to: 1, + absFrom: innerPos, + absTo: innerPos + 1, + text: '7', + capturedStyle: undefined, + } as any; + const step = { + id: 'rewrite-newline-into-total-page-number', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'a\nb' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + const totalPageNumber = editor.state.doc.nodeAt(nodePos); + expect(totalPageNumber?.type.name).toBe('total-page-number'); + // No lineBreak node was forced into the text*-only parent. + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); + + // The probe must be taken at the actual edit position, not once at the range + // start: a rewrite can start in a normal run (which allows lineBreak) and the + // newline edit can land inside a total-page-number (text*-only). A single + // probe at the start would mint a lineBreak that the field parent rejects. + it('does not force a lineBreak into total-page-number when a spanning rewrite lands the newline inside the field', () => { + // paragraph > run > [ text 'AB', total-page-number > text '7' ] + editor = initTestEditor({ + loadFromSchema: true, + content: { + type: 'doc', + content: [ + { + type: 'paragraph', + attrs: {}, + content: [ + { + type: 'run', + attrs: {}, + content: [ + { type: 'text', text: 'AB' }, + { type: 'total-page-number', attrs: {}, content: [{ type: 'text', text: '7' }] }, + ], + }, + ], + }, + ], + }, + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + + const nodePos = findTotalPageNumberPos(editor); // total-page-number opens here; 'B' is at nodePos-1 + const tr = editor.state.tr; + // Range 'B7' starts in the run ('B') and spans into the field's text ('7'). + // The replacement appends '\n', whose edit lands at the end of the field's + // text (a text*-only parent). + const target = { + kind: 'range', + stepId: 'span-rewrite', + op: 'text.rewrite', + blockId: '__selection__', + from: 0, + to: 0, + absFrom: nodePos - 1, // 'B' in the run + absTo: nodePos + 2, // just after '7' inside the field + text: 'B7', + capturedStyle: undefined, + } as any; + const step = { + id: 'span-rewrite-into-field', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'B7\n' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + expect(editor.state.doc.nodeAt(nodePos)?.type.name).toBe('total-page-number'); + // The newline landed in the text*-only field, so it falls back to literal + // text rather than forcing a (schema-invalid) lineBreak there. + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); +}); + +describe('docx export: newline becomes (SD-3278)', () => { + let docData: Awaited>; + let editor: any | undefined; + + beforeAll(async () => { + docData = await loadTestDataForEditorTests('blank-doc.docx'); + }); + + const makeBlankDocEditor = () => + initTestEditor({ + content: docData.docx, + media: docData.media, + mediaFiles: docData.mediaFiles, + fonts: docData.fonts, + // Tracked mutations require an author/user on the editor instance. + user: { name: 'Integration User', email: 'integration@example.com' }, + }).editor; + + afterEach(() => { + editor?.destroy(); + editor = undefined; + }); + + it('exports a Word-native for a text node that already holds a raw newline', async () => { + editor = makeBlankDocEditor(); + + // Seed a raw '\n' directly via the core command (tr.insertText keeps the + // literal newline) to reproduce an imported .docx whose held a raw + // newline (this exercises the export safety net, not the creation path). + editor.dispatch(editor.state.tr.insertText('Alpha\nBeta', 1)); + expect(editor.state.doc.textContent).toContain('Alpha\nBeta'); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + + const xml = await editor.exportDocx({ exportXmlOnly: true }); + expect(xml).toContain(' around (no leftover )', async () => { + editor = makeBlankDocEditor(); + + // Seed "Alpha\nBeta" as one text node, then mark the whole range as a tracked + // deletion. On export the run is split around ; every segment must + // become inside , never a stray . + editor.dispatch(editor.state.tr.insertText('Alpha\nBeta', 1)); + const delMark = editor.schema.marks.trackDelete.create({ + id: 'del-1', + author: 'Reviewer', + authorEmail: 'reviewer@example.com', + date: '2026-01-01T00:00:00.000Z', + }); + editor.dispatch(editor.state.tr.addMark(1, 1 + 'Alpha\nBeta'.length, delMark)); + + const xml = await editor.exportDocx({ exportXmlOnly: true }); + expect(xml).toContain(' survives inside the deletion (it would be ignored by Word). + expect(/]*>[\s\S]*?<\/w:del>/.test(xml)).toBe(false); + }); + + it('tracked doc.replace with a newline exports valid tracked OOXML (inserted text in , break preserved as )', async () => { + editor = makeBlankDocEditor(); + + // Seed text to target, then tracked-replace it with multi-line content. + editor.dispatch(editor.state.tr.insertText('hello world', 1)); + const match = editor.doc.query.match({ select: { type: 'text', pattern: 'hello world' }, require: 'first' }); + const ref = match?.items?.[0]?.handle?.ref; + expect(ref).toBeTruthy(); + + editor.doc.replace({ ref, text: 'Alpha\nBeta' }, { changeMode: 'tracked' }); + + const xml = await editor.exportDocx({ exportXmlOnly: true }); + // Inserted text is tracked and the newline survives as a Word-native break. + expect(xml).toContain('/ the way noBreakHyphen's translator does, so the break + // exports as its own run rather than inside . The output is valid + // OOXML; on reject the orphan break remains. This is an export-routing gap, + // not a schema limit (a leaf atom can carry marks). Tracked deletes of an + // existing raw-newline node keep the break inside via the single-run + // path above. + }); +}); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts index 6b038a2210..1d1af2b13b 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/node-materializer.ts @@ -917,7 +917,15 @@ function materializeTab(schema: Schema): ProseMirrorNode { } function materializeLineBreak(schema: Schema): ProseMirrorNode { - const nodeType = schema.nodes.hardBreak ?? schema.nodes.lineBreak; + // A `kind: 'lineBreak'` item is a soft break and must materialize to `lineBreak` + // (exports ``), not `hardBreak` (exports ``, a page + // break). Block-level page breaks use the distinct `kind: 'break'` path. + // KNOWN AMBIGUITY: structural projection (sd-projection) maps BOTH hardBreak + // and lineBreak to `kind: 'lineBreak'` with no discriminator, so a structural + // read/write echo of an *inline* page break is demoted to a soft break here. + // That projection ambiguity predates this change and needs a discriminator to + // round-trip inline page breaks (separate follow-up). + const nodeType = schema.nodes.lineBreak ?? schema.nodes.hardBreak; if (!nodeType) return schema.text('\n'); return nodeType.create(); } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts index 6559632baa..d3e3085c80 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/structural-write-engine/structural-write-engine.test.ts @@ -895,6 +895,74 @@ describe('materializeFragment — capability gates', () => { // Extension nodes bypass capability gates — should fall through to fallback materializer expect(() => materializeFragment(editor.state.schema, fragment, new Set(), 'insert')).not.toThrow(); }); + + // SD-3278: a `kind: 'lineBreak'` inline must become a soft `lineBreak` + // node (exports to ), NOT a `hardBreak` node (exports to a page break). + // This is the bug that made the structural.replace workaround fail. + it('materializes a structural lineBreak as a soft lineBreak node, not a hardBreak', () => { + const fragment: SDFragment = { + kind: 'paragraph', + paragraph: { + inlines: [ + { kind: 'run', run: { text: 'Alpha' } }, + { kind: 'lineBreak' }, + { kind: 'run', run: { text: 'Beta' } }, + ], + }, + } as any; + + const materialized = materializeFragment(editor.state.schema, fragment, new Set(), 'insert'); + const roots = Array.isArray(materialized) ? materialized : [materialized]; + const typeNames = new Set(); + roots.forEach((root: any) => root.descendants((node: any) => typeNames.add(node.type.name))); + + expect(typeNames.has('lineBreak')).toBe(true); + expect(typeNames.has('hardBreak')).toBe(false); + }); + + // SD-3278: a structural run whose text carries a raw newline (a natural shape + // for SDK/agent-built `structural.replace` content) must split into lineBreak + // nodes, not keep the literal '\n' in a text node. + it('splits a newline inside structural run text into a lineBreak node, not raw text', () => { + const fragment: SDFragment = { + kind: 'paragraph', + paragraph: { + inlines: [{ kind: 'run', run: { text: 'left\nright' } }], + }, + } as any; + + const materialized = materializeFragment(editor.state.schema, fragment, new Set(), 'insert'); + const roots = Array.isArray(materialized) ? materialized : [materialized]; + const typeCounts: Record = {}; + let text = ''; + roots.forEach((root: any) => + root.descendants((node: any) => { + typeCounts[node.type.name] = (typeCounts[node.type.name] ?? 0) + 1; + if (node.isText) text += node.text ?? ''; + }), + ); + + expect(typeCounts.lineBreak).toBe(1); + expect(typeCounts.hardBreak ?? 0).toBe(0); + // No raw newline survives inside a text node. + expect(text.includes('\n')).toBe(false); + expect(text).toContain('left'); + expect(text).toContain('right'); + // The break reads back as '\n' via leafText, so structural reads that + // flatten PM content preserve it on round-trip (SD-3278). + const readableText = roots + .map((root: any) => { + if (typeof root.textBetween !== 'function') return root.textContent ?? ''; + const leafText = (node: any) => { + const specLeafText = node?.type?.spec?.leafText; + return typeof specLeafText === 'function' ? specLeafText(node) : ''; + }; + const size = root.content?.size ?? root.size ?? 0; + return root.textBetween(0, size, '', leafText); + }) + .join(''); + expect(readableText).toBe('left\nright'); + }); }); // --------------------------------------------------------------------------- diff --git a/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js b/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js index 4fbc2a148c..e141dcc41f 100644 --- a/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js +++ b/packages/super-editor/src/editors/v1/extensions/line-break/line-break.js @@ -32,6 +32,17 @@ export const LineBreak = Node.create({ content: '', atom: true, + // The visible text representation of this leaf. Without it, flattening APIs + // see the break as nothing or a U+FFFC placeholder, so the rewrite char-diff + // and the doc-api text model (query.match, structural projection) disagree + // with the visible document. For example, an idempotent text.rewrite over + // `Alpha\nBeta` would duplicate the break, and query.match could not find it. + // Read by PM's built-in `Node.textBetween` (so `node.textContent` too), + // SuperDoc's `textBetweenWithTabs` / `charOffsetToDocPos`, SearchIndex, and + // `text-offset-resolver`. Mirrors the `noBreakHyphen` leaf (U+2011). See + // SD-3278. + leafText: () => '\n', + addOptions() { return {}; }, diff --git a/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js b/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js index 593f599a5f..8613d95e53 100644 --- a/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js +++ b/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js @@ -386,14 +386,16 @@ export const Paragraph = OxmlNode.create({ // We avoid `findParentNode(isList)` here because `isList` depends on // `getResolvedParagraphProperties`, a WeakMap cache keyed by node // identity. After the numbering plugin's `appendTransaction` sets - // `listRendering`, the paragraph node object is replaced, leaving - // the new node uncached — causing `isList` to return false. + // `listRendering`, the paragraph node object is replaced, so the + // new node is uncached and `isList` can return false. const { $from } = selection; let paragraph = null; + let paragraphDepth = null; for (let d = $from.depth; d >= 0; d--) { const node = $from.node(d); if (node.type.name === 'paragraph') { paragraph = node; + paragraphDepth = d; break; } } @@ -404,7 +406,14 @@ export const Paragraph = OxmlNode.create({ if (!isListParagraph) return false; if (!isVisuallyEmptyParagraph(paragraph) && !hasOnlyBreakContent(paragraph)) return false; - const tr = state.tr.insertText(event.data); + let tr = state.tr; + if (hasOnlyBreakContent(paragraph) && paragraphDepth != null) { + const contentStart = $from.start(paragraphDepth); + const contentEnd = $from.end(paragraphDepth); + tr = tr.delete(contentStart, contentEnd).insertText(event.data, contentStart); + } else { + tr = tr.insertText(event.data); + } view.dispatch(tr); event.preventDefault(); return true; diff --git a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js index a830c3d298..238b6f269d 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js +++ b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js @@ -28,6 +28,12 @@ const DELETION_BARRIER = '\u0000'; const DEFAULT_SEARCH_MODEL = 'raw'; const hasTrackDeleteMark = (node) => node?.marks?.some((mark) => mark?.type?.name === 'trackDelete') ?? false; +const readLeafText = (node) => { + const leafText = node?.type?.spec?.leafText; + if (typeof leafText === 'function') return leafText(node); + if (typeof leafText === 'string') return leafText; + return ATOM_PLACEHOLDER; +}; /** * SearchIndex provides a lazily-built, cached index for searching across @@ -69,7 +75,7 @@ export class SearchIndex { this.#buildVisible(doc); } else { // Get the flattened text using ProseMirror's optimized textBetween - this.text = doc.textBetween(0, doc.content.size, BLOCK_SEPARATOR, ATOM_PLACEHOLDER); + this.text = doc.textBetween(0, doc.content.size, BLOCK_SEPARATOR, readLeafText); } this.segments = []; @@ -141,7 +147,12 @@ export class SearchIndex { } if (node.isLeaf) { - parts.push(ATOM_PLACEHOLDER); + if (hasTrackDeleteMark(node)) { + appendDeletionBarrier(); + return; + } + + parts.push(readLeafText(node)); emittedDeletionBarrier = false; return; } @@ -240,29 +251,48 @@ export class SearchIndex { } if (node.isLeaf) { + if (searchModel === 'visible' && hasTrackDeleteMark(node)) { + if (context?.deletionBarrierActive) { + return offset; + } + addSegment({ + offsetStart: offset, + offsetEnd: offset + 1, + docFrom: docPos, + docTo: docPos + node.nodeSize, + kind: 'atom', + }); + if (context) { + context.deletionBarrierActive = true; + } + return offset + 1; + } + if (context && searchModel === 'visible') { context.deletionBarrierActive = false; } + const leafText = readLeafText(node); + if (leafText.length === 0) return offset; // Leaf node (atom): check if it's a hard_break or other atom if (node.type.name === 'hard_break') { addSegment({ offsetStart: offset, - offsetEnd: offset + 1, + offsetEnd: offset + leafText.length, docFrom: docPos, docTo: docPos + node.nodeSize, kind: 'hardBreak', }); - return offset + 1; + return offset + leafText.length; } - // Other atoms get the replacement character + // Other atoms use their declared leaf text or the replacement character. addSegment({ offsetStart: offset, - offsetEnd: offset + 1, + offsetEnd: offset + leafText.length, docFrom: docPos, docTo: docPos + node.nodeSize, kind: 'atom', }); - return offset + 1; + return offset + leafText.length; } // For non-leaf nodes, recurse into content @@ -310,6 +340,14 @@ export class SearchIndex { */ offsetRangeToDocRanges(start, end) { const ranges = []; + // A single search hit is gapless in offset space, so consecutive segments + // (text and inline-leaf atoms like lineBreak) belong to one contiguous + // match. Coalesce them into one doc range — otherwise a hit spanning + // `text + lineBreak + text` yields discontiguous text ranges that the + // downstream D5 contiguity guard rejects (SD-3278). A block separator is a + // real split between blocks and ends the current range. The D5 guard still + // catches genuinely separate edits, which are not offset-contiguous. + let current = null; for (const segment of this.segments) { // Skip segments entirely before our range @@ -317,25 +355,41 @@ export class SearchIndex { // Stop if we're past our range if (segment.offsetStart >= end) break; - // Only include text segments in the result - if (segment.kind !== 'text') continue; + // Block separators split blocks; never coalesce across them. + if (segment.kind === 'blockSep') { + if (current) { + ranges.push({ from: current.from, to: current.to }); + current = null; + } + continue; + } - // Calculate the overlap const overlapStart = Math.max(start, segment.offsetStart); const overlapEnd = Math.min(end, segment.offsetEnd); + if (overlapStart >= overlapEnd) continue; + + let from; + let to; + if (segment.kind === 'text') { + from = segment.docFrom + (overlapStart - segment.offsetStart); + to = segment.docFrom + (overlapEnd - segment.offsetStart); + } else { + // Inline leaf atom (lineBreak, hardBreak, image, ...): occupies its + // whole node span and is part of the contiguous match, not a gap. + from = segment.docFrom; + to = segment.docTo; + } - if (overlapStart < overlapEnd) { - // Map the overlap back to document positions - const startInSegment = overlapStart - segment.offsetStart; - const endInSegment = overlapEnd - segment.offsetStart; - - ranges.push({ - from: segment.docFrom + startInSegment, - to: segment.docFrom + endInSegment, - }); + if (current && segment.offsetStart === current.offsetEnd) { + current.to = Math.max(current.to, to); + current.offsetEnd = overlapEnd; + } else { + if (current) ranges.push({ from: current.from, to: current.to }); + current = { from, to, offsetEnd: overlapEnd }; } } + if (current) ranges.push({ from: current.from, to: current.to }); return ranges; } diff --git a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js index bde782ccf8..33075aab83 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js +++ b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js @@ -1,5 +1,6 @@ // @ts-nocheck import { describe, it, expect } from 'vitest'; +import { Schema } from 'prosemirror-model'; import { SearchIndex } from './SearchIndex.js'; describe('SearchIndex.stripDiacritics', () => { @@ -142,6 +143,38 @@ describe('SearchIndex.searchIgnoringDiacritics', () => { }); }); +describe('SearchIndex lineBreak leaf text', () => { + const schema = new Schema({ + nodes: { + doc: { content: 'paragraph+' }, + paragraph: { group: 'block', content: 'inline*' }, + text: { group: 'inline' }, + lineBreak: { + group: 'inline', + inline: true, + atom: true, + leafText: () => '\n', + }, + }, + marks: {}, + }); + + function buildLineBreakDoc() { + return schema.nodes.doc.create(null, [ + schema.nodes.paragraph.create(null, [schema.text('Alpha'), schema.nodes.lineBreak.create(), schema.text('Beta')]), + ]); + } + + it('indexes lineBreak using its declared leafText', () => { + const index = new SearchIndex(); + + index.ensureValid(buildLineBreakDoc()); + + expect(index.text).toBe('Alpha\nBeta'); + expect(index.search('Alpha\nBeta')).toHaveLength(1); + }); +}); + describe('SearchIndex searchModel: visible', () => { function textNode(text, { deleted = false } = {}) { return { @@ -155,6 +188,19 @@ describe('SearchIndex searchModel: visible', () => { }; } + function leafNode(typeName, { deleted = false, leafText = undefined } = {}) { + return { + isText: false, + isLeaf: true, + isInline: true, + isBlock: false, + type: { name: typeName, spec: leafText ? { leafText } : {} }, + nodeSize: 1, + marks: deleted ? [{ type: { name: 'trackDelete' } }] : [], + forEach: () => {}, + }; + } + function containerNode(children, { isBlock = false, textBetween = '' } = {}) { return { isText: false, @@ -204,6 +250,23 @@ describe('SearchIndex searchModel: visible', () => { return { doc }; } + function buildDocWithTrackedDeletedLineBreak() { + const paragraph = containerNode( + [textNode('before'), leafNode('lineBreak', { deleted: true, leafText: () => '\n' }), textNode('after')], + { + isBlock: true, + textBetween: 'before\nafter', + }, + ); + + const doc = containerNode([paragraph], { + isBlock: false, + textBetween: 'before\nafter', + }); + + return { doc }; + } + it('excludes pending tracked deletions in visible model', () => { const { doc } = buildDocWithTrackedDeletion(); const index = new SearchIndex(); @@ -250,4 +313,14 @@ describe('SearchIndex searchModel: visible', () => { expect(ranges[0].to - ranges[0].from).toBe('after'.length); expect(ranges[0]).toEqual({ from: 13, to: 18 }); }); + + it('excludes pending tracked deletions on leaf nodes in visible model', () => { + const { doc } = buildDocWithTrackedDeletedLineBreak(); + const index = new SearchIndex(); + + index.ensureValid(doc, { searchModel: 'visible' }); + + expect(index.search('\n')).toHaveLength(0); + expect(index.search('beforeafter')).toHaveLength(0); + }); }); diff --git a/packages/super-editor/src/editors/v1/extensions/search/search.js b/packages/super-editor/src/editors/v1/extensions/search/search.js index 8679aa08e3..e3fcbb196b 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/search.js +++ b/packages/super-editor/src/editors/v1/extensions/search/search.js @@ -49,11 +49,12 @@ const mapIndexMatchesToDocMatches = ({ searchIndex, indexMatches, doc, positionT if (ranges.length === 0) continue; const matchTexts = ranges.map((r) => doc.textBetween(r.from, r.to)); + const matchText = typeof indexMatch.text === 'string' ? indexMatch.text : matchTexts.join(''); const match = { from: ranges[0].from, to: ranges[ranges.length - 1].to, - text: matchTexts.join(''), + text: matchText, id: uuidv4(), ranges, trackerIds: [], From 480ea20e5a719f974252223f04c86782c2a4ac3b Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Fri, 5 Jun 2026 16:42:51 -0300 Subject: [PATCH 2/4] fix(super-editor): keep caret after replacing break-only list content (SD-3278) Typing into a list item that holds only a placeholder break dropped the caret before the first inserted character, so subsequent native keystrokes prepended instead of appended ("abcdef" landed as "bcdefa"). Move the selection past the inserted text after the delete+insert. --- .../src/editors/v1/extensions/paragraph/paragraph.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js b/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js index 8613d95e53..94a02b611c 100644 --- a/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js +++ b/packages/super-editor/src/editors/v1/extensions/paragraph/paragraph.js @@ -411,6 +411,10 @@ export const Paragraph = OxmlNode.create({ const contentStart = $from.start(paragraphDepth); const contentEnd = $from.end(paragraphDepth); tr = tr.delete(contentStart, contentEnd).insertText(event.data, contentStart); + // insertText at an explicit position leaves the caret before the inserted + // text, so subsequent native keystrokes prepend instead of append (typing + // "abcdef" lands as "bcdefa"). Place the caret after the inserted text. + tr = tr.setSelection(TextSelection.create(tr.doc, contentStart + event.data.length)); } else { tr = tr.insertText(event.data); } From f9a7b8eb4e98db2c33a84214320de48824259015 Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Fri, 5 Jun 2026 16:42:52 -0300 Subject: [PATCH 3/4] fix(super-editor): require PM-contiguity when coalescing search ranges (SD-3278) Coalesce adjacent search segments only when they are both offset-contiguous (same hit) and document-adjacent (segment.docFrom === current.to). This merges text + lineBreak + text within one run into a single range without bridging a skipped/tracked-deleted leaf or a run boundary, so the downstream D5 contiguity guard still rejects genuinely separate edits. --- .../v1/extensions/search/SearchIndex.js | 12 ++++++-- .../v1/extensions/search/SearchIndex.test.js | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js index 238b6f269d..162d6e5289 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js +++ b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.js @@ -380,8 +380,16 @@ export class SearchIndex { to = segment.docTo; } - if (current && segment.offsetStart === current.offsetEnd) { - current.to = Math.max(current.to, to); + // Coalesce only when the next segment is BOTH offset-contiguous (same + // search hit) AND PM-contiguous (`from === current.to`, i.e. immediately + // adjacent in the document). This merges `text + lineBreak + text` within + // one run into a single range, but never bridges a document gap — a + // skipped/tracked-deleted leaf, a run boundary, or any content the match + // does not actually cover. A non-coalesced segment becomes its own range; + // since it stays offset-contiguous, the downstream block coalescing still + // sees no gap, while the D5 guard keeps rejecting genuinely separate edits. + if (current && segment.offsetStart === current.offsetEnd && from === current.to) { + current.to = to; current.offsetEnd = overlapEnd; } else { if (current) ranges.push({ from: current.from, to: current.to }); diff --git a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js index 33075aab83..6b92f0f791 100644 --- a/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js +++ b/packages/super-editor/src/editors/v1/extensions/search/SearchIndex.test.js @@ -173,6 +173,34 @@ describe('SearchIndex lineBreak leaf text', () => { expect(index.text).toBe('Alpha\nBeta'); expect(index.search('Alpha\nBeta')).toHaveLength(1); }); + + it('coalesces a hit spanning text + lineBreak + text into one contiguous doc range', () => { + const index = new SearchIndex(); + index.ensureValid(buildLineBreakDoc()); + + // The full 'Alpha\nBeta' span (text + lineBreak + text, all PM-adjacent in + // one block) must map to a single contiguous range, not discontiguous text + // ranges that the downstream D5 contiguity guard would reject. + const ranges = index.offsetRangeToDocRanges(0, index.text.length); + expect(ranges).toHaveLength(1); + }); + + it('does NOT coalesce across a block separator (negative)', () => { + const index = new SearchIndex(); + index.ensureValid( + schema.nodes.doc.create(null, [ + schema.nodes.paragraph.create(null, [schema.text('Alpha')]), + schema.nodes.paragraph.create(null, [schema.text('Beta')]), + ]), + ); + + // 'Alpha\nBeta' here is two paragraphs joined by a block separator, not an + // inline break. The separator is a real split: the span must stay two ranges + // (one per block), never a single editable text range. + expect(index.text).toBe('Alpha\nBeta'); + const ranges = index.offsetRangeToDocRanges(0, index.text.length); + expect(ranges).toHaveLength(2); + }); }); describe('SearchIndex searchModel: visible', () => { From 3bd10f3b16b4bee73f12ae70ed481aebd9e4a3ed Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Fri, 5 Jun 2026 18:00:21 -0300 Subject: [PATCH 4/4] test(super-editor): cover executeSpanTextRewrite newline probe (SD-3278) The span-rewrite path got the same parentAllowsLineBreak probe as the rewrite/insert paths but had no newline test, though its comment claimed coverage. Add two cases: a single '\n' in a normal parent mints one lineBreak (no hardBreak, no raw newline text node), and the same into a text*-only total-page-number falls back to literal text with no lineBreak. --- .../newline-handling.integration.test.ts | 80 ++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts index 442c73c1be..3d69098ae7 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/newline-handling.integration.test.ts @@ -1,6 +1,6 @@ import { afterEach, beforeAll, describe, expect, it } from 'vitest'; import { initTestEditor, loadTestDataForEditorTests } from '@tests/helpers/helpers.js'; -import { executeTextInsert, executeTextRewrite } from './executor.ts'; +import { executeSpanTextRewrite, executeTextInsert, executeTextRewrite } from './executor.ts'; /** * SD-3278: multi-line text forwarded into text-mode mutations (e.g. @@ -379,6 +379,84 @@ describe('newline insert into a restrictive (text*) parent', () => { // text rather than forcing a (schema-invalid) lineBreak there. expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); }); + + // executeSpanTextRewrite has its own single-block replacement path with a + // separate parentAllowsLineBreak probe. The rewrite/insert paths above cover + // their probes, but the span path's was unexercised even though its source + // comment claims it is covered. A single inline '\n' stays in one replacement + // block (split is on \n{2,}), so it reaches this single-block path. + it('span rewrite with a single newline builds one lineBreak in a normal parent', () => { + editor = makeSchemaEditor(['hello world']); // paragraph > run > text 'hello world' + const tr = editor.state.tr; + + // A real two-segment span over the run text ('hello' + ' world'). The run + // admits a lineBreak, so the single '\n' must mint exactly one. + const target = { + kind: 'span', + stepId: 'span-newline-normal', + op: 'text.rewrite', + matchId: 'm:span-normal', + segments: [ + { blockId: 'p1', from: 0, to: 5, absFrom: 2, absTo: 7 }, + { blockId: 'p1', from: 5, to: 11, absFrom: 7, absTo: 13 }, + ], + text: 'hello world', + marks: [], + capturedStyleBySegment: [], + } as any; + const step = { + id: 'span-newline-normal', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'Alpha\nBeta' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeSpanTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + expect(countNodeType(editor, 'lineBreak')).toBe(1); + expect(hasNodeOfType(editor, 'hardBreak')).toBe(false); + + // The break is a real node, never a raw '\n' baked into a text node. + let rawNewlineText = false; + editor.state.doc.descendants((node: any) => { + if (node.isText && typeof node.text === 'string' && node.text.includes('\n')) rawNewlineText = true; + }); + expect(rawNewlineText).toBe(false); + }); + + it('span rewrite with a newline into total-page-number falls back to literal text, no lineBreak', () => { + editor = makeEditorWithTotalPageCount(); // paragraph > run > total-page-number > text '7' + const nodePos = findTotalPageNumberPos(editor); + const tr = editor.state.tr; + + // The span sits entirely inside the field's text ('7' at [nodePos+1, nodePos+2]). + // total-page-number is text*-only, so the probe at the edit position rejects a + // lineBreak and the replacement falls back to literal text (the export safety + // net turns it into a later). + const target = { + kind: 'span', + stepId: 'span-newline-field', + op: 'text.rewrite', + matchId: 'm:span-field', + segments: [{ blockId: 'p1', from: 0, to: 1, absFrom: nodePos + 1, absTo: nodePos + 2 }], + text: '7', + marks: [], + capturedStyleBySegment: [], + } as any; + const step = { + id: 'span-newline-field', + op: 'text.rewrite', + where: { by: 'ref', ref: 'ignored' }, + args: { replacement: { text: 'Alpha\nBeta' }, style: { inline: { mode: 'preserve' } } }, + } as any; + + expect(() => executeSpanTextRewrite(editor, tr, target, step, { map: (pos: number) => pos } as any)).not.toThrow(); + editor.dispatch(tr); + + expect(editor.state.doc.nodeAt(nodePos)?.type.name).toBe('total-page-number'); + expect(hasNodeOfType(editor, 'lineBreak')).toBe(false); + }); }); describe('docx export: newline becomes (SD-3278)', () => {