From a62755b2661c3737476b0f5ba4b1c9631916727a Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Thu, 7 May 2026 18:36:23 -0300 Subject: [PATCH 1/4] feat(pm-adapter): preserve run-level bidi/script metadata on TextRun (SD-2781) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two preservation-only fields to the layout text-run contract, kept on separate axes per ECMA's own categorization: - TextRun.bidi (RunBidiContext): direction signals only - run rtl flag now, embedding (w:dir) and override (w:bdo) wired in Wave 1c. - TextRun.script (RunScriptContext): complex-script flag + per-script language metadata (default / complexScript / eastAsian) per §17.3.2.20. Both populated by pm-adapter from raw run properties when present. Wave 1a does not render either; Wave 1b will gate the formatting-stack selection on script.complexScript, Wave 1c will read bidi.embedding/override. Why now: ECMA puts direction (rtl, dir, bdo) and script formatting (cs, lang/@bidi) in different categories. Lumping them under one bidi field would collapse the axes and lie about the schema. Adding both fields now means Wave 1b/1c don't have to introduce both the data path and the rendering at once. The RunScriptContext.language field expanded from a single string to a structured object with three optional tags. No production consumer reads the field yet (preservation-only since #3184), so the shape change is safe. Tests: - bidi populated only on rtl, script populated only on cs/lang - explicit rtl=false preserved (a meaningful override) - the three lang tags land on separate fields - axis non-collapse: rtl never leaks into script, cs never leaks into bidi --- .../contracts/src/direction-context.ts | 15 +++++- packages/layout-engine/contracts/src/index.ts | 13 +++++ .../inline-converters/common.test.ts | 53 +++++++++++++++++++ .../converters/inline-converters/common.ts | 48 ++++++++++++++++- 4 files changed, 126 insertions(+), 3 deletions(-) diff --git a/packages/layout-engine/contracts/src/direction-context.ts b/packages/layout-engine/contracts/src/direction-context.ts index e6246851aa..8e4ce996ff 100644 --- a/packages/layout-engine/contracts/src/direction-context.ts +++ b/packages/layout-engine/contracts/src/direction-context.ts @@ -136,6 +136,17 @@ export type RunBidiContext = { export type RunScriptContext = { /** w:rPr/w:cs. Forces complex-script formatting regardless of Unicode. */ complexScript: boolean; - /** w:rPr/w:lang/@bidi. Complex-script language metadata (spellcheck). */ - language?: string; + /** + * Per-script language metadata, kept on separate fields per ECMA §17.3.2.20 + * because each maps to a different formatting stack (Latin / CS / East Asian). + * Wave 1b consumes these to gate spellcheck and font-stack selection. + */ + language?: { + /** w:rPr/w:lang/@val. Default (Latin) language tag. */ + default?: string; + /** w:rPr/w:lang/@bidi. Complex-script language tag. */ + complexScript?: string; + /** w:rPr/w:lang/@eastAsia. East Asian language tag. */ + eastAsian?: string; + }; }; diff --git a/packages/layout-engine/contracts/src/index.ts b/packages/layout-engine/contracts/src/index.ts index be0b316ce6..96218eb609 100644 --- a/packages/layout-engine/contracts/src/index.ts +++ b/packages/layout-engine/contracts/src/index.ts @@ -315,6 +315,19 @@ export type TextRun = RunMarks & { }; /** Tracked-change metadata from ProseMirror marks. */ trackedChange?: TrackedChangeMeta; + /** + * Run-level bidi signals preserved from the source DOCX (run rtl flag, + * embedding/override directions). Direction-only - script formatting lives + * on `script`. Populated by pm-adapter from raw run properties; not yet + * rendered (Wave 1c consumes embedding/override). + */ + bidi?: RunBidiContext; + /** + * Run-level script context preserved from the source DOCX (complex-script + * flag, per-script language metadata). Wave 1b uses `complexScript` to gate + * the formatting-stack selection (Latin variants vs CS variants). + */ + script?: RunScriptContext; }; export type TabRun = RunMarks & { diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts index f773dfaea1..b67d6e7299 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts @@ -117,4 +117,57 @@ describe('applyInlineRunProperties', () => { expect(result.bold).toBe(false); }); + + // SD-2781: TextRun.bidi and TextRun.script preserve run-level direction and + // script signals from raw run properties. Wave 1a does not render either. + describe('SD-2781 bidi/script preservation', () => { + it('does not attach bidi or script when no relevant signals are set', () => { + const result = applyInlineRunProperties(baseRun, { bold: true }); + expect(result.bidi).toBeUndefined(); + expect(result.script).toBeUndefined(); + }); + + it('preserves run rtl on TextRun.bidi (does not affect script)', () => { + const result = applyInlineRunProperties(baseRun, { rtl: true }); + expect(result.bidi).toEqual({ rtl: true }); + expect(result.script).toBeUndefined(); + }); + + it('preserves explicit rtl=false (a meaningful override of inherited rtl)', () => { + const result = applyInlineRunProperties(baseRun, { rtl: false }); + expect(result.bidi).toEqual({ rtl: false }); + }); + + it('preserves complex-script flag on TextRun.script (does not affect bidi)', () => { + const result = applyInlineRunProperties(baseRun, { cs: true }); + expect(result.script).toEqual({ complexScript: true }); + expect(result.bidi).toBeUndefined(); + }); + + it('preserves the three lang tags on separate fields per ECMA §17.3.2.20', () => { + const result = applyInlineRunProperties(baseRun, { + lang: { val: 'en-US', bidi: 'ar-SA', eastAsia: 'ja-JP' }, + }); + expect(result.script?.language).toEqual({ + default: 'en-US', + complexScript: 'ar-SA', + eastAsian: 'ja-JP', + }); + }); + + it('partial lang attrs only fill the fields that were set', () => { + const result = applyInlineRunProperties(baseRun, { lang: { bidi: 'he-IL' } }); + expect(result.script?.language).toEqual({ complexScript: 'he-IL' }); + }); + + it('keeps rtl and cs on separate axes (axis non-collapse)', () => { + const result = applyInlineRunProperties(baseRun, { rtl: true, cs: true }); + // rtl goes to bidi only; cs goes to script only + expect(result.bidi).toEqual({ rtl: true }); + expect(result.script).toEqual({ complexScript: true }); + // cs must NOT leak into bidi, and rtl must NOT leak into script + expect(result.bidi).not.toHaveProperty('complexScript'); + expect(result.script).not.toHaveProperty('rtl'); + }); + }); }); diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts index 8f1c4b2d02..fca07c199a 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts @@ -1,5 +1,12 @@ import type { RunProperties, ParagraphProperties } from '@superdoc/style-engine/ooxml'; -import type { FlowBlock, SdtMetadata, TextRun, ParagraphAttrs } from '@superdoc/contracts'; +import type { + FlowBlock, + RunBidiContext, + RunScriptContext, + SdtMetadata, + TextRun, + ParagraphAttrs, +} from '@superdoc/contracts'; import { HyperlinkConfig, NodeHandlerContext, @@ -73,6 +80,38 @@ export type BlockConverterOptions = { paragraphAttrs: ParagraphAttrs; }; +/** + * Build a RunBidiContext from raw run properties when any direction signal is set. + * Returns undefined when nothing to preserve, so empty contexts don't bloat the + * layout tree. Wave 1c will populate `embedding` (w:dir) and `override` (w:bdo). + */ +const buildBidiContext = (runProperties: RunProperties): RunBidiContext | undefined => { + if (runProperties.rtl == null) return undefined; + return { rtl: runProperties.rtl === true }; +}; + +/** + * Build a RunScriptContext from raw run properties when any script signal is set. + * Per ECMA §17.3.2.20, w:lang carries three independent language tags - default + * (Latin), bidi (complex-script), eastAsia - mapped here to one structured field. + */ +const buildScriptContext = (runProperties: RunProperties): RunScriptContext | undefined => { + const cs = runProperties.cs; + const lang = runProperties.lang; + const hasLang = lang != null && (lang.val != null || lang.bidi != null || lang.eastAsia != null); + if (cs == null && !hasLang) return undefined; + + const ctx: RunScriptContext = { complexScript: cs === true }; + if (hasLang) { + const language: NonNullable = {}; + if (lang.val != null) language.default = lang.val; + if (lang.bidi != null) language.complexScript = lang.bidi; + if (lang.eastAsia != null) language.eastAsian = lang.eastAsia; + ctx.language = language; + } + return ctx; +}; + export const applyInlineRunProperties = ( run: TextRun, runProperties: RunProperties | undefined, @@ -90,5 +129,12 @@ export const applyInlineRunProperties = ( (merged as Record)[key] = runAttrs[key]; } } + // SD-2781: preserve run-level bidi/script metadata. Wave 1a stops at preservation; + // Wave 1b consumes script.complexScript to gate CS formatting, Wave 1c consumes + // bidi.embedding and bidi.override. + const bidi = buildBidiContext(runProperties); + if (bidi) merged.bidi = bidi; + const script = buildScriptContext(runProperties); + if (script) merged.script = script; return merged; }; From b7cf49dae8b21a36b4456ee9d1d50eafc62ea04d Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Thu, 7 May 2026 18:52:26 -0300 Subject: [PATCH 2/4] fix(contracts): import RunBidiContext + RunScriptContext locally for TextRun The previous commit added `bidi?: RunBidiContext` and `script?: RunScriptContext` to TextRun in contracts/src/index.ts but only re-exported the type names; they were not in local type scope, so `tsc --project` failed: src/index.ts(324,10): error TS2304: Cannot find name 'RunBidiContext'. src/index.ts(330,12): error TS2304: Cannot find name 'RunScriptContext'. Vitest's transform doesn't enforce type-only imports the way tsc does, so the unit tests passed even though the build was broken. CI build would have caught it; the missing piece was running `pnpm build` locally before pushing. Adds the two names to the existing local `import type` statement alongside ParagraphDirectionContext (same pattern, same line 19). Also adds two integration tests in pm-adapter/src/integration.test.ts that exercise the full PM -> FlowBlock conversion through the unmocked applyInlineRunProperties pipeline. The previous unit tests in common.test.ts mock computeRunAttrs, which is why they couldn't catch shape-level regressions in the contracts package. The integration tests prove a real PM doc with raw runProperties (rtl/cs/lang on a run-wrapper node) produces a TextRun with populated bidi/script, and that runs without signals don't gain empty objects. --- packages/layout-engine/contracts/src/index.ts | 2 +- .../pm-adapter/src/integration.test.ts | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/packages/layout-engine/contracts/src/index.ts b/packages/layout-engine/contracts/src/index.ts index 96218eb609..71db9a28cc 100644 --- a/packages/layout-engine/contracts/src/index.ts +++ b/packages/layout-engine/contracts/src/index.ts @@ -16,7 +16,7 @@ export type { RunBidiContext, RunScriptContext, } from './direction-context.js'; -import type { ParagraphDirectionContext } from './direction-context.js'; +import type { ParagraphDirectionContext, RunBidiContext, RunScriptContext } from './direction-context.js'; // Export table contracts export { diff --git a/packages/layout-engine/pm-adapter/src/integration.test.ts b/packages/layout-engine/pm-adapter/src/integration.test.ts index e9bdc14328..05d54ab07b 100644 --- a/packages/layout-engine/pm-adapter/src/integration.test.ts +++ b/packages/layout-engine/pm-adapter/src/integration.test.ts @@ -982,4 +982,65 @@ describe('page break integration tests', () => { expect(exhibitPageIndex).toBe(1); expect(layout.pages[1].fragments.length).toBeGreaterThan(0); }); + + // SD-2781: end-to-end check that runs the unmocked applyInlineRunProperties + // pipeline. The unit tests in common.test.ts mock computeRunAttrs, so they + // can't catch type-shape regressions in the contracts package. This test + // exercises the full PM -> FlowBlock conversion with raw runProperties on + // a real run-wrapper node, mirroring how the importer emits them. + it('preserves run-level rtl, cs, and lang on TextRun.bidi / TextRun.script', () => { + const pmDoc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'run', + attrs: { + runProperties: { + rtl: true, + cs: true, + lang: { val: 'en-US', bidi: 'ar-SA', eastAsia: 'ja-JP' }, + }, + }, + content: [{ type: 'text', text: 'mixed-script run' }], + }, + ], + }, + ], + }; + + const { blocks } = toFlowBlocks(pmDoc); + const paragraph = blocks.find((block) => block.kind === 'paragraph'); + expect(paragraph).toBeDefined(); + if (paragraph?.kind !== 'paragraph') return; + + const textRun = paragraph.runs.find((run) => 'text' in run && run.text === 'mixed-script run'); + expect(textRun).toBeDefined(); + expect(textRun?.bidi).toEqual({ rtl: true }); + expect(textRun?.script).toEqual({ + complexScript: true, + language: { default: 'en-US', complexScript: 'ar-SA', eastAsian: 'ja-JP' }, + }); + }); + + it('omits bidi and script on TextRun when no signals are set (no bloat)', () => { + const pmDoc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [{ type: 'text', text: 'plain Latin text' }], + }, + ], + }; + + const { blocks } = toFlowBlocks(pmDoc); + const paragraph = blocks.find((block) => block.kind === 'paragraph'); + if (paragraph?.kind !== 'paragraph') return; + const textRun = paragraph.runs.find((run) => 'text' in run && run.text === 'plain Latin text'); + expect(textRun?.bidi).toBeUndefined(); + expect(textRun?.script).toBeUndefined(); + }); }); From 3e922f58b68b0b2d87be2ffdef1d4e6a5b96612a Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Thu, 7 May 2026 18:58:16 -0300 Subject: [PATCH 3/4] docs: drop duplicate ticket prefix on SD-2781 test block comment Repo convention puts the ticket reference in the `describe` label only (e.g., `describe('SD-1333: ...')`). Trims the redundant `SD-2781:` prefix from the block comment, keeps the non-obvious "Wave 1a preserves these signals; nothing renders them yet" note. --- .../pm-adapter/src/converters/inline-converters/common.test.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts index b67d6e7299..f2ac2d19a1 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts @@ -118,8 +118,7 @@ describe('applyInlineRunProperties', () => { expect(result.bold).toBe(false); }); - // SD-2781: TextRun.bidi and TextRun.script preserve run-level direction and - // script signals from raw run properties. Wave 1a does not render either. + // Wave 1a preserves these signals; nothing renders them yet. describe('SD-2781 bidi/script preservation', () => { it('does not attach bidi or script when no relevant signals are set', () => { const result = applyInlineRunProperties(baseRun, { bold: true }); From 32154c91ba517c266acea943c39b4c547323df5b Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Thu, 7 May 2026 19:13:11 -0300 Subject: [PATCH 4/4] fix(pm-adapter): read bidi/script from raw inline runProperties + reassign on token runs Two codex findings on PR #3203: 1. Cascade-leak: applyInlineRunProperties was receiving cascade-resolved runProperties from runNodeChildrenToRuns and populating bidi/script from them. Style-inherited runs ended up with bidi/script metadata they did not have inline, making preservation indistinguishable from direct formatting and bloating the layout tree on every styled run. Fix: thread the raw inline runProperties through InlineConverterParams (alongside the existing cascade-resolved runProperties). applyInlineRunProperties gains a fourth parameter that bidi/script populate from. When the caller doesn't opt in (no inline parameter), no metadata is attached - safer default than reading the cascaded view. 2. Token-run drop: generic-token.ts called applyInlineRunProperties without reassigning the return value. Since the helper builds a new object via spread, all merged fields (including the SD-2781 bidi/script) were lost for page-number / total-page-count token runs inside an rtl run wrapper. Fix: change the local to `let` and reassign. Also forwards inlineRunProperties through. no-break-hyphen.ts (another caller) updated for consistency. Tests added: - 3 unit tests in common.test.ts proving bidi/script populate from inlineRunProperties only, never from cascade-resolved runProperties - Existing 7 SD-2781 unit tests updated to pass inlineRunProperties (the new opt-in source) - 1 integration test proving bidi/script propagate to page-number tokens inside an rtl run wrapper All 1800 pm-adapter tests, 12644 super-editor tests, and 1201 layout-bridge tests pass. Contracts + pm-adapter both build clean. --- .../inline-converters/common.test.ts | 46 +++++++++++++++---- .../converters/inline-converters/common.ts | 27 ++++++++--- .../inline-converters/generic-token.ts | 8 +++- .../inline-converters/no-break-hyphen.ts | 3 +- .../src/converters/inline-converters/run.ts | 7 ++- .../converters/inline-converters/text-run.ts | 3 +- .../pm-adapter/src/converters/paragraph.ts | 2 + .../pm-adapter/src/integration.test.ts | 32 +++++++++++++ 8 files changed, 107 insertions(+), 21 deletions(-) diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts index f2ac2d19a1..5f6595cfa9 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts @@ -121,32 +121,31 @@ describe('applyInlineRunProperties', () => { // Wave 1a preserves these signals; nothing renders them yet. describe('SD-2781 bidi/script preservation', () => { it('does not attach bidi or script when no relevant signals are set', () => { - const result = applyInlineRunProperties(baseRun, { bold: true }); + const result = applyInlineRunProperties(baseRun, { bold: true }, undefined, { bold: true }); expect(result.bidi).toBeUndefined(); expect(result.script).toBeUndefined(); }); it('preserves run rtl on TextRun.bidi (does not affect script)', () => { - const result = applyInlineRunProperties(baseRun, { rtl: true }); + const result = applyInlineRunProperties(baseRun, { rtl: true }, undefined, { rtl: true }); expect(result.bidi).toEqual({ rtl: true }); expect(result.script).toBeUndefined(); }); it('preserves explicit rtl=false (a meaningful override of inherited rtl)', () => { - const result = applyInlineRunProperties(baseRun, { rtl: false }); + const result = applyInlineRunProperties(baseRun, { rtl: false }, undefined, { rtl: false }); expect(result.bidi).toEqual({ rtl: false }); }); it('preserves complex-script flag on TextRun.script (does not affect bidi)', () => { - const result = applyInlineRunProperties(baseRun, { cs: true }); + const result = applyInlineRunProperties(baseRun, { cs: true }, undefined, { cs: true }); expect(result.script).toEqual({ complexScript: true }); expect(result.bidi).toBeUndefined(); }); it('preserves the three lang tags on separate fields per ECMA §17.3.2.20', () => { - const result = applyInlineRunProperties(baseRun, { - lang: { val: 'en-US', bidi: 'ar-SA', eastAsia: 'ja-JP' }, - }); + const lang = { val: 'en-US', bidi: 'ar-SA', eastAsia: 'ja-JP' }; + const result = applyInlineRunProperties(baseRun, { lang }, undefined, { lang }); expect(result.script?.language).toEqual({ default: 'en-US', complexScript: 'ar-SA', @@ -155,12 +154,14 @@ describe('applyInlineRunProperties', () => { }); it('partial lang attrs only fill the fields that were set', () => { - const result = applyInlineRunProperties(baseRun, { lang: { bidi: 'he-IL' } }); + const lang = { bidi: 'he-IL' }; + const result = applyInlineRunProperties(baseRun, { lang }, undefined, { lang }); expect(result.script?.language).toEqual({ complexScript: 'he-IL' }); }); it('keeps rtl and cs on separate axes (axis non-collapse)', () => { - const result = applyInlineRunProperties(baseRun, { rtl: true, cs: true }); + const props = { rtl: true, cs: true }; + const result = applyInlineRunProperties(baseRun, props, undefined, props); // rtl goes to bidi only; cs goes to script only expect(result.bidi).toEqual({ rtl: true }); expect(result.script).toEqual({ complexScript: true }); @@ -168,5 +169,32 @@ describe('applyInlineRunProperties', () => { expect(result.bidi).not.toHaveProperty('complexScript'); expect(result.script).not.toHaveProperty('rtl'); }); + + // Cascade-leak guard: when the cascade-resolved runProperties has rtl/cs/lang + // but the raw inline runProperties does NOT, the metadata must not appear. + // Otherwise every style-inherited run gets false bidi/script signals. + it('does NOT populate bidi/script from cascade-resolved props alone', () => { + const cascadedProps = { rtl: true, cs: true, lang: { bidi: 'ar-SA' } }; + const inlineProps = {}; // No inline rtl/cs/lang on the source run + const result = applyInlineRunProperties(baseRun, cascadedProps, undefined, inlineProps); + expect(result.bidi).toBeUndefined(); + expect(result.script).toBeUndefined(); + }); + + it('does NOT populate bidi/script when caller omits inlineRunProperties', () => { + // Default safety: callers that don't opt in to metadata get nothing. + const result = applyInlineRunProperties(baseRun, { rtl: true, cs: true }); + expect(result.bidi).toBeUndefined(); + expect(result.script).toBeUndefined(); + }); + + it('inline overrides survive even when cascade-resolved props differ', () => { + // User explicitly set rtl=true inline; cascade may also resolve to true. + // Either way, inline is the source of truth for preservation. + const cascaded = { rtl: true, fontSize: 12 }; + const inline = { rtl: true }; + const result = applyInlineRunProperties(baseRun, cascaded, undefined, inline); + expect(result.bidi).toEqual({ rtl: true }); + }); }); }); diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts index fca07c199a..ab06ca96ad 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts @@ -26,6 +26,7 @@ type VisitNodeFn = ( activeSdt: SdtMetadata | undefined, activeRunProperties: RunProperties | undefined, activeHidden?: boolean, + activeInlineRunProperties?: RunProperties, ) => void; export class HiddenByVanishError extends Error { @@ -53,6 +54,13 @@ export type InlineConverterParams = { hyperlinkConfig: HyperlinkConfig; themeColors: ThemeColorPalette | undefined; runProperties: RunProperties | undefined; + /** + * The raw inline w:rPr from the run wrapper, BEFORE the style cascade. Used by + * preservation-only metadata (TextRun.bidi / TextRun.script in SD-2781) so + * style-inherited values don't surface as if they were direct formatting. + * Undefined for callers outside a run wrapper. + */ + inlineRunProperties: RunProperties | undefined; paragraphProperties: ParagraphProperties | undefined; converterContext: ConverterContext; enableComments: boolean; @@ -116,6 +124,7 @@ export const applyInlineRunProperties = ( run: TextRun, runProperties: RunProperties | undefined, converterContext?: ConverterContext, + inlineRunProperties?: RunProperties, ): TextRun => { if (!runProperties) { return run; @@ -129,12 +138,16 @@ export const applyInlineRunProperties = ( (merged as Record)[key] = runAttrs[key]; } } - // SD-2781: preserve run-level bidi/script metadata. Wave 1a stops at preservation; - // Wave 1b consumes script.complexScript to gate CS formatting, Wave 1c consumes - // bidi.embedding and bidi.override. - const bidi = buildBidiContext(runProperties); - if (bidi) merged.bidi = bidi; - const script = buildScriptContext(runProperties); - if (script) merged.script = script; + // SD-2781: preserve run-level bidi/script metadata. Read from `inlineRunProperties` + // (the raw inline w:rPr, before the style cascade) so style-inherited runs don't + // get false metadata - per ECMA the metadata categories track what the source + // document encoded, not what the cascade resolved to. When the caller doesn't + // supply inline properties, no metadata is populated. + if (inlineRunProperties) { + const bidi = buildBidiContext(inlineRunProperties); + if (bidi) merged.bidi = bidi; + const script = buildScriptContext(inlineRunProperties); + if (script) merged.script = script; + } return merged; }; diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts index fe77a18543..2b92b38410 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts @@ -27,6 +27,7 @@ export function tokenNodeToRun({ themeColors, sdtMetadata, runProperties, + inlineRunProperties, converterContext, }: InlineConverterParams): TextRun | null { const token = TOKEN_INLINE_TYPES.get(node.type); @@ -35,7 +36,7 @@ export function tokenNodeToRun({ } // Tokens carry a placeholder character so measurers reserve width; painters will replace it with the real value. - const run: TextRun = { + let run: TextRun = { text: '0', token, fontFamily: defaultFont, @@ -61,7 +62,10 @@ export function tokenNodeToRun({ const marks = [...effectiveMarks, ...(inheritedMarks ?? [])]; applyMarksToRun(run, marks, hyperlinkConfig, themeColors, undefined, true, storyKey); - applyInlineRunProperties(run, runProperties, converterContext); + // Reassign the return value: applyInlineRunProperties returns a new object + // via spread, so the merged fields (including SD-2781 bidi/script metadata) + // are dropped if we don't capture them here. + run = applyInlineRunProperties(run, runProperties, converterContext, inlineRunProperties); // If marksAsAttrs carried font styling, mark the run so downstream defaults don't overwrite it. if (marksAsAttrs.length > 0) { diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts index 9a27dbb46b..f71399c50b 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts @@ -24,6 +24,7 @@ export function noBreakHyphenNodeToRun({ themeColors, enableComments, runProperties, + inlineRunProperties, converterContext, }: InlineConverterParams): TextRun { let run: TextRun = { @@ -52,7 +53,7 @@ export function noBreakHyphenNodeToRun({ run.sdt = sdtMetadata; } - run = applyInlineRunProperties(run, runProperties, converterContext); + run = applyInlineRunProperties(run, runProperties, converterContext, inlineRunProperties); return run; } diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts index 53e9b77bfc..929f211406 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts @@ -22,5 +22,10 @@ export function runNodeChildrenToRuns({ false, false, ); - node.content?.forEach((child) => visitNode(child, mergedMarks, sdtMetadata, resolvedRunProperties, false)); + // Pass the RAW inline runProperties alongside the resolved cascade. SD-2781's + // bidi/script preservation must read from inline only - cascade-resolved + // values would tag every style-inherited run with metadata it didn't have. + node.content?.forEach((child) => + visitNode(child, mergedMarks, sdtMetadata, resolvedRunProperties, false, runProperties), + ); } diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts index c051b8fe8e..11ae20ed05 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts @@ -37,6 +37,7 @@ export function textNodeToRun({ themeColors, enableComments, runProperties, + inlineRunProperties, converterContext, }: InlineConverterParams): TextRun { let run: TextRun = { @@ -65,7 +66,7 @@ export function textNodeToRun({ if (sdtMetadata) { run.sdt = sdtMetadata; } - run = applyInlineRunProperties(run, runProperties, converterContext); + run = applyInlineRunProperties(run, runProperties, converterContext, inlineRunProperties); return run; } diff --git a/packages/layout-engine/pm-adapter/src/converters/paragraph.ts b/packages/layout-engine/pm-adapter/src/converters/paragraph.ts index 62fcc93d00..83fca1ee5a 100644 --- a/packages/layout-engine/pm-adapter/src/converters/paragraph.ts +++ b/packages/layout-engine/pm-adapter/src/converters/paragraph.ts @@ -744,6 +744,7 @@ export function paragraphToFlowBlocks({ activeSdt?: SdtMetadata, activeRunProperties?: RunProperties, activeHidden = false, + activeInlineRunProperties?: RunProperties, ) => { if (activeHidden && node.type !== 'run') { suppressedByVanish = true; @@ -765,6 +766,7 @@ export function paragraphToFlowBlocks({ themeColors, enableComments, runProperties: activeRunProperties, + inlineRunProperties: activeInlineRunProperties, paragraphProperties: resolvedParagraphProperties, converterContext, visitNode, diff --git a/packages/layout-engine/pm-adapter/src/integration.test.ts b/packages/layout-engine/pm-adapter/src/integration.test.ts index 05d54ab07b..a34cf615c2 100644 --- a/packages/layout-engine/pm-adapter/src/integration.test.ts +++ b/packages/layout-engine/pm-adapter/src/integration.test.ts @@ -1043,4 +1043,36 @@ describe('page break integration tests', () => { expect(textRun?.bidi).toBeUndefined(); expect(textRun?.script).toBeUndefined(); }); + + // SD-2781 round 2 (codex finding): generic-token.ts:64 calls + // applyInlineRunProperties without reassigning the return value, so token + // runs (page numbers, total page counts) lose run-level bidi/script metadata + // even when wrapped in a run that explicitly sets rtl/cs/lang. + it('preserves bidi/script on page-number token TextRuns inside an rtl run', () => { + const pmDoc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'run', + attrs: { runProperties: { rtl: true, cs: true } }, + content: [{ type: 'page-number' }], + }, + ], + }, + ], + }; + + const { blocks } = toFlowBlocks(pmDoc); + const paragraph = blocks.find((block) => block.kind === 'paragraph'); + if (paragraph?.kind !== 'paragraph') return; + const tokenRun = paragraph.runs.find( + (run) => 'token' in run && (run.token === 'pageNumber' || run.token === 'totalPageCount'), + ); + expect(tokenRun, 'token run should be present').toBeDefined(); + expect(tokenRun?.bidi).toEqual({ rtl: true }); + expect(tokenRun?.script).toEqual({ complexScript: true }); + }); });