Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const INLINE_FALLBACK_TYPES = new Set([
'endnoteReference',
'fieldAnnotation',
'structuredContent',
'image',
'mathInline',
'passthroughInline',
'page-number',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ describe('isInlineNode', () => {
expect(isInlineNode({ type: 'bookmarkStart', attrs: { id: '1' } })).toBe(true);
expect(isInlineNode({ type: 'bookmarkEnd', attrs: { id: '1' } })).toBe(true);
expect(isInlineNode({ type: 'tab' })).toBe(true);
expect(isInlineNode({ type: 'image', attrs: { src: 'media/image1.png' } })).toBe(true);
expect(isInlineNode({ type: 'footnoteReference', attrs: { id: '1' } })).toBe(true);
});

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,56 @@
import { parseAnnotationMarks } from './handle-annotation-node';
import { parseStrictStOnOff } from '../../../utils.js';
import { BLOCK_FIELD_XML_NAMES } from '../../../sd/shared/block-field-xml-names.js';
import { isInlineNode } from '../../../helpers/is-inline-node.js';

const INLINE_CONTEXT_XML_NAMES = new Set(['w:p', 'w:r', 'w:hyperlink', 'w:smartTag']);

function hasDirectBlockSignal(sdtContent) {
return Boolean(
sdtContent?.elements?.some(
(el) => el?.name === 'w:p' || el?.name === 'w:tbl' || BLOCK_FIELD_XML_NAMES.has(el?.name),
),
);
}

function canEmitInlineStructuredContent(path = []) {
return path.some((entry) => INLINE_CONTEXT_XML_NAMES.has(entry?.name) || entry?.name === 'w:sdtContent');
}

function hasTranslatedBlockContent(content = [], schema) {
return content.some((node) => node?.type && !isInlineNode(node, schema));
}

function wrapInlineRunsAsParagraphs(content = [], schema) {
const normalized = [];
let pendingInline = [];

const flushInline = () => {
if (!pendingInline.length) return;
normalized.push({
type: 'paragraph',
attrs: null,
content: pendingInline,
marks: [],
});
pendingInline = [];
};

for (const node of content) {
if (!node) continue;

if (isInlineNode(node, schema)) {
pendingInline.push(node);
continue;
}

flushInline();
normalized.push(node);
}

flushInline();
return normalized;
}

/**
* Detect the semantic control type from sdtPr child elements.
Expand Down Expand Up @@ -113,25 +163,25 @@ export function handleStructuredContentNode(params) {
return null;
}

const paragraph = sdtContent.elements?.find((el) => el.name === 'w:p');
const table = sdtContent.elements?.find((el) => el.name === 'w:tbl');
// SD-3005: a content control wrapping a block field (e.g. BIBLIOGRAPHY) has
// no direct w:p after preprocessing — its child is an sd:* block node. It is
// block content and must not be emitted as an inline structuredContent.
const blockField = sdtContent.elements?.find((el) => BLOCK_FIELD_XML_NAMES.has(el?.name));
const { marks } = parseAnnotationMarks(sdtContent);
const translatedContent = nodeListHandler.handler({
...params,
nodes: sdtContent.elements,
path: [...(params.path || []), sdtContent],
});

const isBlockNode = paragraph || table || blockField;
const schema = params.editor?.schema;
const content = Array.isArray(translatedContent) ? translatedContent : [];
const isBlockNode =
hasTranslatedBlockContent(content, schema) ||
hasDirectBlockSignal(sdtContent) ||
!canEmitInlineStructuredContent(params.path);
const sdtContentType = isBlockNode ? 'structuredContentBlock' : 'structuredContent';
const normalizedContent = isBlockNode ? wrapInlineRunsAsParagraphs(content, schema) : content;

let result = {
type: sdtContentType,
content: translatedContent,
content: normalizedContent,
marks,
attrs: {
id: id?.attributes?.['w:val'] || null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { handleStructuredContentNode } from './handle-structured-content-node';
import { parseAnnotationMarks } from './handle-annotation-node';
import { defaultNodeListHandler } from '../../../../../v2/importer/docxImporter.js';
import { initTestEditor } from '@tests/helpers/helpers.js';

// Mock dependencies
vi.mock('./handle-annotation-node', () => ({
Expand Down Expand Up @@ -31,6 +33,10 @@ describe('handleStructuredContentNode', () => {
parseAnnotationMarks.mockReturnValue({ marks: [] });
});

afterEach(() => {
vi.restoreAllMocks();
});

it('returns null when nodes array is empty', () => {
const params = { nodes: [], nodeListHandler: mockNodeListHandler };
const result = handleStructuredContentNode(params);
Expand Down Expand Up @@ -79,7 +85,7 @@ describe('handleStructuredContentNode', () => {
const params = {
nodes: [node],
nodeListHandler: mockNodeListHandler,
path: [],
path: [{ name: 'w:p' }],
};

parseAnnotationMarks.mockReturnValue({ marks: [{ type: 'bold' }] });
Expand Down Expand Up @@ -363,3 +369,177 @@ describe('handleStructuredContentNode', () => {
});
});
});

describe('handleStructuredContentNode nested SDT import regression', () => {
let editor;

const textRun = (text) => ({
name: 'w:r',
elements: [{ name: 'w:t', elements: [{ type: 'text', text }] }],
});

const paragraph = (text) => ({
name: 'w:p',
elements: [textRun(text)],
});

const sdtPr = ({ id, tag, alias, lockMode = 'unlocked', controlType = 'w:richText' }) => ({
name: 'w:sdtPr',
elements: [
{ name: 'w:id', attributes: { 'w:val': id } },
{ name: 'w:tag', attributes: { 'w:val': tag } },
{ name: 'w:alias', attributes: { 'w:val': alias } },
{ name: 'w:lock', attributes: { 'w:val': lockMode } },
{ name: controlType },
],
});

const sdt = (props, contentElements) => ({
name: 'w:sdt',
elements: [sdtPr(props), { name: 'w:sdtContent', elements: contentElements }],
});

const table = (text) => ({
name: 'w:tbl',
elements: [
{
name: 'w:tblPr',
elements: [{ name: 'w:tblW', attributes: { 'w:w': '2400', 'w:type': 'dxa' } }],
},
{
name: 'w:tblGrid',
elements: [{ name: 'w:gridCol', attributes: { 'w:w': '2400' } }],
},
{
name: 'w:tr',
elements: [
{
name: 'w:tc',
elements: [
{
name: 'w:tcPr',
elements: [{ name: 'w:tcW', attributes: { 'w:w': '2400', 'w:type': 'dxa' } }],
},
paragraph(text),
],
},
],
},
],
});

const importNodes = (nodes) => {
const nodeListHandler = defaultNodeListHandler();
return nodeListHandler.handler({
nodes,
nodeListHandler,
docx: {},
editor,
path: [],
});
};

const expectSchemaValid = (content) => {
let pmDoc;
expect(() => {
pmDoc = editor.schema.nodeFromJSON({ type: 'doc', content });
pmDoc.check();
}).not.toThrow();
return pmDoc;
};

const findFirstJson = (node, predicate) => {
if (!node) return null;
if (predicate(node)) return node;
for (const child of node.content || []) {
const found = findFirstJson(child, predicate);
if (found) return found;
}
return null;
};

beforeEach(() => {
({ editor } = initTestEditor({
isHeadless: true,
loadFromSchema: true,
content: { type: 'doc', content: [{ type: 'paragraph' }] },
}));
parseAnnotationMarks.mockReturnValue({ marks: [] });
});

afterEach(() => {
editor?.destroy();
editor = null;
vi.restoreAllMocks();
});

it('imports nested block SDT when outer sdtContent directly contains w:sdt wrapping a paragraph', () => {
const inner = sdt({ id: 'inner-block', tag: 'inner-tag', alias: 'Inner Alias', lockMode: 'contentLocked' }, [
paragraph('Nested paragraph'),
]);
const outer = sdt({ id: 'outer-block', tag: 'outer-tag', alias: 'Outer Alias', lockMode: 'sdtLocked' }, [inner]);

const result = importNodes([outer]);

expect(result).toHaveLength(1);
expect(result[0].type).toBe('structuredContentBlock');
expect(result[0].attrs).toMatchObject({
id: 'outer-block',
tag: 'outer-tag',
alias: 'Outer Alias',
lockMode: 'sdtLocked',
controlType: 'richText',
});

const nested = result[0].content?.[0];
expect(nested?.type).toBe('structuredContentBlock');
expect(nested.attrs).toMatchObject({
id: 'inner-block',
tag: 'inner-tag',
alias: 'Inner Alias',
lockMode: 'contentLocked',
controlType: 'richText',
});
expect(nested.attrs.sdtPr?.elements?.find((el) => el.name === 'w:alias')?.attributes?.['w:val']).toBe(
'Inner Alias',
);

expectSchemaValid(result);
});

it('wraps nested inline SDT safely when an outer block SDT also contains paragraph and table content', () => {
const inlineNested = sdt(
{ id: 'inner-inline', tag: 'inline-tag', alias: 'Inline Alias', lockMode: 'sdtContentLocked' },
[textRun('Inline value')],
);
const outer = sdt({ id: 'outer-mixed', tag: 'outer-mixed-tag', alias: 'Outer Mixed', lockMode: 'sdtLocked' }, [
inlineNested,
paragraph('Outer paragraph'),
table('Cell text'),
]);

const result = importNodes([outer]);

expect(result).toHaveLength(1);
expect(result[0].type).toBe('structuredContentBlock');
expect(result[0].content?.map((node) => node.type)).toEqual(['paragraph', 'paragraph', 'table']);

const nested = findFirstJson(
result[0],
(node) => node.type === 'structuredContent' && node.attrs?.id === 'inner-inline',
);
expect(nested).toBeTruthy();
expect(nested.attrs).toMatchObject({
id: 'inner-inline',
tag: 'inline-tag',
alias: 'Inline Alias',
lockMode: 'sdtContentLocked',
controlType: 'richText',
});
expect(nested.attrs.sdtPr?.elements?.find((el) => el.name === 'w:lock')?.attributes?.['w:val']).toBe(
'sdtContentLocked',
);

expectSchemaValid(result);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# SDT classification fixtures (PR #3616)

Real `.docx` fixtures that validate the nested content-control classifier in
`super-converter/v3/handlers/w/sdt/`. Exercised by
`tests/editor/sdt-nested-classification.test.js`.

The claim under test: block vs run/inline SDT classification is driven by the
translated ProseMirror content shape plus import context, not only by the direct
XML child names of `w:sdtContent`.

## Provenance and conformance

Each fixture's surrounding package (content types, rels, styles, theme, fonts, and
image media) is taken verbatim from a Word-authored base already in this folder.
Only `word/document.xml` is hand-authored to encode the exact OOXML shape, so the
package stays valid while the structure is precise. All fixtures are therefore
**schema-only** (hand-authored structure, not produced or validated by Word).

The `conformance` column distinguishes shapes that are valid ECMA-376 from one that
is deliberately malformed to exercise the PR's defensive normalization:

| Fixture | Conformance | Base package | Shape under `w:body` |
|---|---|---|---|
| `sdt-nested-block.docx` | conformant | `blank-doc.docx` | block `w:sdt` whose `w:sdtContent` directly contains a nested block `w:sdt` (no direct `w:p`) wrapping a paragraph. Legal: `EG_ContentBlockContent` permits `sdt`. |
| `sdt-nested-inline.docx` | conformant | `blank-doc.docx` | `w:p` containing an inline `w:sdt` that contains a nested inline `w:sdt` of runs, between two text runs. Legal: `CT_SdtContentRun` is `EG_PContent`. |
| `sdt-mixed-block.docx` | **defensive (malformed)** | `blank-doc.docx` | block `w:sdt` whose `w:sdtContent` holds a bare inline `w:sdt`, a `w:p`, and a `w:tbl`. The bare inline `w:sdt` is **non-conformant**: a `w:sdt` directly under block content is positionally `CT_SdtBlock`, whose content may not be a bare `w:r` (`EG_ContentBlockContent` allows only `customXml/sdt/p/tbl/EG_RunLevelElts`, and `EG_RunLevelElts` excludes `w:r`). Included on purpose to drive `wrapInlineRunsAsParagraphs`, which the PR uses to normalize bare inline content inside a block SDT. |
| `sdt-inline-picture.docx` | conformant | `anchor_images.docx` (reuses `media/image1.png`, `rId4`) | `w:p` > inline `w:sdt` with `<w:picture/>` marker > `w:sdtContent` > `w:r` > `w:drawing`. Legal per ECMA-376 §17.5.2.24 (picture content control wrapping a single DrawingML picture). |

## Rebuild

```
node packages/super-editor/src/editors/v1/tests/data/sdt-fixtures.generate.cjs
```

The generator resolves all paths from its own location and reads the two base
packages (`blank-doc.docx`, `anchor_images.docx`) from this folder, so it is
portable. It re-reads each built file and asserts the intended shape. Set
`SDT_FIXTURE_OUT=/some/dir` to write to a scratch dir instead of overwriting the
committed fixtures (useful for a dry run). Regeneration is content-equivalent;
only zip metadata may differ.

## Out of scope

Row-level SDTs (`w:tbl > w:sdt > w:sdtContent > w:tr`, Google Docs `goog_rdk_*`
exports) are a separate table-walk concern tracked by SD-3118 / IT-1040. The real
Google Docs artifact attached to those tickets should be used as that fixture, and
its preservation checked through a Word round-trip before choosing transparent
unwrap vs. `rowSdt` metadata. Not covered by these fixtures.
Loading
Loading