Skip to content
Merged
2 changes: 1 addition & 1 deletion apps/docs/document-api/reference/_generated-manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -1018,5 +1018,5 @@
}
],
"marker": "{/* GENERATED FILE: DO NOT EDIT. Regenerate via `pnpm run docapi:sync`. */}",
"sourceHash": "e74a36833ec8587b67447a79517de348cfc9b4bba1c564729c184f6d5464a018"
"sourceHash": "c8670fb494b56c19fbd09a7bada35974fbb3c22d938f6a5e01eee6e8467961c0"
}
56 changes: 55 additions & 1 deletion apps/docs/document-api/reference/extract.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ _No fields._
{
"headingLevel": 1,
"nodeId": "node-def456",
"tableContext": {
"colspan": 1,
"columnIndex": 1,
"parentRowIndex": 1,
"parentTableOrdinal": 1,
"rowIndex": 1,
"rowspan": 1,
"tableOrdinal": 1
},
"text": "Hello, world.",
"type": "example"
}
Expand Down Expand Up @@ -110,12 +119,57 @@ _No fields._
"description": "Stable block ID — pass to scrollToElement() for navigation.",
"type": "string"
},
"tableContext": {
"additionalProperties": false,
"properties": {
"colspan": {
"description": "Number of columns the cell spans.",
"type": "integer"
},
"columnIndex": {
"description": "0-based logical grid column, not the row child order.",
"type": "integer"
},
"parentColumnIndex": {
"description": "Column index in the parent table. Set with parentTableOrdinal.",
"type": "integer"
},
"parentRowIndex": {
"description": "Row index in the parent table. Set with parentTableOrdinal.",
"type": "integer"
},
"parentTableOrdinal": {
"description": "Ordinal of the parent table when the containing table is nested.",
"type": "integer"
},
"rowIndex": {
"description": "0-based row index of the containing cell.",
"type": "integer"
},
"rowspan": {
"description": "Number of rows the cell spans.",
"type": "integer"
},
"tableOrdinal": {
"description": "0-based table ordinal, unique within one extract() result.",
"type": "integer"
}
},
"required": [
"tableOrdinal",
"rowIndex",
"columnIndex",
"rowspan",
"colspan"
],
"type": "object"
},
"text": {
"description": "Full plain text content of the block.",
"type": "string"
},
"type": {
"description": "Block type: paragraph, heading, listItem, table, image, etc.",
"description": "Block type: paragraph, heading, listItem, image, tableOfContents.",
"type": "string"
}
},
Expand Down
33 changes: 32 additions & 1 deletion packages/document-api/src/contract/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@
const trackedChangeAddressSchema = ref('TrackedChangeAddress');
const entityAddressSchema = ref('EntityAddress');
const selectionTargetSchema = ref('SelectionTarget');
const targetLocatorSchema = ref('TargetLocator');

Check warning on line 614 in packages/document-api/src/contract/schemas.ts

View workflow job for this annotation

GitHub Actions / build

'targetLocatorSchema' is assigned a value but never used. Allowed unused vars must match /^_/u
const deleteBehaviorSchema = ref('DeleteBehavior');
const resolvedHandleSchema = ref('ResolvedHandle');
const pageInfoSchema = ref('PageInfo');
Expand Down Expand Up @@ -886,7 +886,7 @@
text: { type: 'string' },
});

const nodeInfoSchema: JsonSchema = {

Check warning on line 889 in packages/document-api/src/contract/schemas.ts

View workflow job for this annotation

GitHub Actions / build

'nodeInfoSchema' is assigned a value but never used. Allowed unused vars must match /^_/u
type: 'object',
required: ['nodeType', 'kind'],
properties: {
Expand All @@ -902,7 +902,7 @@
additionalProperties: false,
};

const matchContextSchema = objectSchema(

Check warning on line 905 in packages/document-api/src/contract/schemas.ts

View workflow job for this annotation

GitHub Actions / build

'matchContextSchema' is assigned a value but never used. Allowed unused vars must match /^_/u
{
address: nodeAddressSchema,
snippet: { type: 'string' },
Expand All @@ -913,7 +913,7 @@
['address', 'snippet', 'highlightRange'],
);

const unknownNodeDiagnosticSchema = objectSchema(

Check warning on line 916 in packages/document-api/src/contract/schemas.ts

View workflow job for this annotation

GitHub Actions / build

'unknownNodeDiagnosticSchema' is assigned a value but never used. Allowed unused vars must match /^_/u
{
message: { type: 'string' },
address: nodeAddressSchema,
Expand Down Expand Up @@ -2963,9 +2963,40 @@
items: objectSchema(
{
nodeId: { type: 'string', description: 'Stable block ID — pass to scrollToElement() for navigation.' },
type: { type: 'string', description: 'Block type: paragraph, heading, listItem, table, image, etc.' },
type: {
type: 'string',
description: 'Block type: paragraph, heading, listItem, image, tableOfContents.',
},
text: { type: 'string', description: 'Full plain text content of the block.' },
headingLevel: { type: 'integer', description: 'Heading level (1–6). Only present for headings.' },
tableContext: objectSchema(
{
tableOrdinal: {
type: 'integer',
description: '0-based table ordinal, unique within one extract() result.',
},
parentTableOrdinal: {
type: 'integer',
description: 'Ordinal of the parent table when the containing table is nested.',
},
parentRowIndex: {
type: 'integer',
description: 'Row index in the parent table. Set with parentTableOrdinal.',
},
parentColumnIndex: {
type: 'integer',
description: 'Column index in the parent table. Set with parentTableOrdinal.',
},
rowIndex: { type: 'integer', description: '0-based row index of the containing cell.' },
columnIndex: {
type: 'integer',
description: '0-based logical grid column, not the row child order.',
},
rowspan: { type: 'integer', description: 'Number of rows the cell spans.' },
colspan: { type: 'integer', description: 'Number of columns the cell spans.' },
},
['tableOrdinal', 'rowIndex', 'columnIndex', 'rowspan', 'colspan'],
),
},
['nodeId', 'type', 'text'],
),
Expand Down
50 changes: 47 additions & 3 deletions packages/document-api/src/types/extract.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,59 @@ import type { CommentStatus, TrackChangeType } from './index.js';
// extract
// ---------------------------------------------------------------------------

/**
* Table coordinates for an {@link ExtractBlock} that lives inside a table cell.
*
* Blocks inside tables are extracted at paragraph granularity (one entry per
* paragraph/heading/listItem/image/sdt/tableOfContents in each cell). Group
* by these fields to reconstruct cells, rows, or whole tables:
*
* - cell: group by `tableOrdinal + rowIndex + columnIndex`
* - row: group by `tableOrdinal + rowIndex`
* - table: group by `tableOrdinal`
*/
export interface ExtractTableContext {
/** 0-based table ordinal, unique within one `extract()` result. */
tableOrdinal: number;
/** Ordinal of the parent table when this block is inside a nested table. */
parentTableOrdinal?: number;
/** Row index within the parent table. Only set with `parentTableOrdinal`. */
parentRowIndex?: number;
/** Column index within the parent table. Only set with `parentTableOrdinal`. */
parentColumnIndex?: number;
/** 0-based row index of the containing cell. */
rowIndex: number;
/** 0-based logical grid column of the containing cell, not the row's child order. */
columnIndex: number;
/** Number of rows the containing cell spans. 1 for unmerged cells. */
rowspan: number;
/** Number of columns the containing cell spans. 1 for unmerged cells. */
colspan: number;
}

/**
* One addressable unit of document content.
*
* Extraction is paragraph-granular: tables are NOT returned as a single block.
* Paragraph-like descendants of table cells are emitted individually with
* `tableContext` attached.
*
* Block SDTs (structured document tags / content controls) are transparent:
* their children emit individually as if they were direct children of the
* enclosing container. No wrapper `sdt` block is emitted. This prevents
* SDT-wrapped tables from re-flattening through the wrapper's textContent.
*/
export interface ExtractBlock {
/** Stable block ID — pass to `scrollToElement()` for navigation. */
/** Stable block ID. Pass to `scrollToElement()` for navigation. */
nodeId: string;
/** Block type: paragraph, heading, listItem, table, image, etc. */
/** Block type: paragraph, heading, listItem, image, tableOfContents. */
type: string;
/** Full plain text content of the block. */
text: string;
/** Heading level (16). Only present for headings. */
/** Heading level (1-6). Only present for headings. */
headingLevel?: number;
/** Table coordinates. Only present for blocks inside a table cell. */
tableContext?: ExtractTableContext;
}

export interface ExtractComment {
Expand Down
Loading
Loading