-
Notifications
You must be signed in to change notification settings - Fork 42
/
schema.ts
448 lines (424 loc) · 12 KB
/
schema.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
import {
AttributeSpec,
MarkSpec,
Node,
NodeSpec,
ParseRule,
Schema,
} from 'prosemirror-model'
import { codeBlock } from './components/codeBlock/codeBlock'
import { codeChunk } from './components/codeChunk/codeChunk'
/**
* ProseMirror schema for a Stencila `Article`.
*
* This schema uses the following conventions:
*
* - Properties of Stencila nodes are represented as ProseMirror `NodeSpec`s with
* a lowercase name (e.g. `title`, `abstract`) and `toDOM` and `parseDOM` rules
* which use the corresponding `data-prop` selector (e.g. [data-prop=title]).
* The `prop` function is a shortcut for creating these node specs.
*
* - Stencila node types are represented as ProseMirror node types with title
* case name (e.g. `Paragraph`), a `toDOM` rule that includes `itemtype` (and `itemscope`)
* for application of semantic themes, and `parseDOM` rules that are as simple as
* possible (for copy-paste) compatibility.
*
* - Stencila node types can define a `contentProp` which is the name of the node property
* that will be used when generating an address e.g. `contentProp: 'cells'`
*
* These conventions make it possible to convert a ProseMirror offset position e.g. `83`
* into a Stencila address e.g. `["content", 1, "caption", 4]`.
*
* Note: When adding types here, please ensure transformations are handled in the
* `transformProsemirror` function.
*
* For docs and examples see:
* - https://prosemirror.net/docs/guide/#schema
* - https://prosemirror.net/examples/schema/
* - https://github.com/ProseMirror/prosemirror-schema-basic/blob/master/src/schema-basic.js
*/
export const articleSchema = new Schema({
topNode: 'Article',
nodes: {
// Article type and its properties
Article: { content: 'title? abstract? content' },
title: prop('title', 'div', 'InlineContent*'),
abstract: prop('abstract', 'div', 'BlockContent+'),
content: prop('content', 'div', 'BlockContent+'),
// Block content types. Note that order is important as the
// first is the default block content
Paragraph: block('Paragraph', 'p', 'InlineContent*'),
Heading: heading(),
List: list(),
ListItem: listItem(),
CodeBlock: codeBlock(),
CodeChunk: codeChunk(),
QuoteBlock: block('QuoteBlock', 'blockquote', 'BlockContent+'),
Table: table(),
TableRow: tableRow(),
TableCell: tableCell(),
TableHeader: tableHeader(),
ThematicBreak: thematicBreak(),
// Inline content types, starting with `text` (equivalent of Stencila `String`),
// the default inline node type
text: {
group: 'InlineContent',
},
CodeFragment: codeFragment(),
},
marks: {
Emphasis: mark('Emphasis', 'em', [
{ tag: 'em' },
{ tag: 'i' },
{ style: 'font-style=italic' },
]),
Strong: mark('Strong', 'strong', [
{ tag: 'strong' },
{ tag: 'b' },
{
style: 'font-weight',
getAttrs: (value) =>
/^(bold(er)?|[5-9]\d{2,})$/.test(value as string) && null,
},
]),
NontextualAnnotation: mark('NontextualAnnotation', 'u'),
Delete: mark('Delete', 'del'),
Subscript: mark('Subscript', 'sub'),
Superscript: mark('Superscript', 'sup'),
},
})
export const articleMarks = Object.keys(articleSchema.marks)
/**
* Generate a `NodeSpec` to represent the property of a Stencila node type.
*
* @param name The name of the property
* @param tag The tag name of the HTML element for `toDOM` and `parseDOM`)
* @param content The expression specifying valid content for the property e.g `InlineContent+`
* @param marks The expression specifying valid marks for the property e.g '_' (all), '' (none)
*/
function prop(
name: string,
tag: string,
content: string,
marks = '_'
): NodeSpec {
return {
content,
marks,
defining: true,
parseDOM: [{ tag: `${tag}[data-prop=${name}]` }],
toDOM(_node) {
return [tag, { 'data-prop': name }, 0]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `BlockContent` node type.
*
* @param name The name of the type e.g. `Paragraph`
* @param group The content group that the type belongs to
* @param tag The tag name of the HTML element for `toDOM` and `parseDOM`)
* @param content The expression specifying valid content for the property e.g `InlineContent+`
* @param marks The expression specifying valid marks for the property e.g '_' (all), '' (none)
*/
function block(
name: string,
tag: string,
content: string,
marks = '_'
): NodeSpec {
return {
group: 'BlockContent',
content,
marks,
defining: true,
parseDOM: [{ tag }],
toDOM(_node) {
return [
tag,
{ itemtype: `https://schema.stenci.la/${name}`, itemscope: '' },
0,
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `Heading`.
*
* Note that, consistent with treatment elsewhere, `h2` => level 3 etc.
* This is because there should only be one `h1` (for the title) and when encoding to
* HTML we add one to the depth e.g. `depth: 1` => `h2`
*/
function heading(): NodeSpec {
return {
group: 'BlockContent',
content: 'InlineContent*',
marks: '_',
defining: true,
attrs: { depth: { default: 1 } },
parseDOM: [
{ tag: 'h1', attrs: { depth: 1 } },
{ tag: 'h2', attrs: { depth: 1 } },
{ tag: 'h3', attrs: { depth: 2 } },
{ tag: 'h4', attrs: { depth: 3 } },
{ tag: 'h5', attrs: { depth: 4 } },
{ tag: 'h6', attrs: { depth: 5 } },
],
toDOM(node) {
return [
`h${(node.attrs.depth as number) + 1}`,
{ itemtype: 'https://schema.stenci.la/Heading', itemscope: '' },
0,
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `List`
*
* See https://github.com/ProseMirror/prosemirror-schema-list/blob/master/src/schema-list.js
* for slightly different node specs for lists.
*/
function list(): NodeSpec {
return {
group: 'BlockContent',
content: 'ListItem*',
contentProp: 'items',
attrs: { order: { default: 'Unordered' } },
parseDOM: [
{ tag: 'ul', attrs: { order: 'Unordered' } },
{ tag: 'ol', attrs: { order: 'Ascending' } },
],
toDOM(node) {
return [
node.attrs.order === 'Unordered' ? 'ul' : 'ol',
{ itemtype: 'https://schema.org/ItemList', itemscope: '' },
0,
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `ListItem`
*
* See https://github.com/ProseMirror/prosemirror-schema-list/blob/master/src/schema-list.js#L50
* for why the `content` is defined as it is: to be able to use the commands in `prosemirror-schema-list`
* package
*/
function listItem(): NodeSpec {
return {
content: 'Paragraph BlockContent*',
parseDOM: [{ tag: 'li' }],
toDOM(_node) {
return [
'li',
{ itemtype: 'https://schema.org/ListItem', itemscope: '' },
0,
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `CodeFragment`
*
* This is temporary and will be replaced with a CodeMirror editor (as with `CodeBlock`)
*/
function codeFragment(): NodeSpec {
return {
inline: true,
group: 'InlineContent',
content: 'text*',
contentProp: 'text',
marks: '',
attrs: {
programmingLanguage: { default: '' },
},
code: true,
parseDOM: [{ tag: 'code', preserveWhitespace: 'full' }],
toDOM(_node) {
return [
'code',
{ itemtype: 'https://schema.stenci.la/CodeFragment', itemscope: '' },
0,
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `Table`
*
* This, and other table related schemas are compatible with `prosemirror-tables` (e.g. `tableRole`
* and `isolating`) attributes but with Stencila compatible naming.
*
* See https://github.com/ProseMirror/prosemirror-tables/blob/master/src/schema.js
*/
function table(): NodeSpec {
return {
group: 'BlockContent',
content: 'TableRow+',
contentProp: 'rows',
tableRole: 'table',
isolating: true,
parseDOM: [{ tag: 'table' }],
toDOM(_node) {
return [
'table',
{ itemtype: 'https://schema.org/Table', itemscope: '' },
['tbody', 0],
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `TableRow`.
*/
function tableRow(): NodeSpec {
return {
content: '(TableHeader|TableCell)*',
contentProp: 'cells',
tableRole: 'row',
parseDOM: [{ tag: 'tr' }],
toDOM(_node) {
return [
'tr',
{ itemtype: 'https://schema.stenci.la/TableRow', itemscope: '' },
0,
]
},
}
}
/**
* The attributes of a `TableCell`
*/
function tableCellAttrsSpec(): Record<string, AttributeSpec> {
return {
colspan: { default: 1 },
rowspan: { default: 1 },
colwidth: { default: null },
}
}
/**
* Get `TableCell` attributes as part of `parseDOM`
*/
function tableCellAttrsGet(dom: HTMLElement): Record<string, unknown> {
const widthAttr = dom.getAttribute('data-colwidth') ?? ''
const widths = /^\d+(,\d+)*$/.test(widthAttr)
? widthAttr.split(',').map((s) => Number(s))
: null
const colspan = Number(dom.getAttribute('colspan') ?? 1)
return {
colspan,
rowspan: Number(dom.getAttribute('rowspan') ?? 1),
colwidth: widths && widths.length === colspan ? widths : null,
}
}
/**
* Set `TableCell` attributes as part of `toDOM`
*/
function tableCellAttrsSet(node: Node): Record<string, string | number> {
const attrs: Record<string, string | number> = {
itemtype: 'https://schema.stenci.la/TableCell',
itemscope: '',
}
if (node.attrs.colspan !== 1) attrs.colspan = node.attrs.colspan as number
if (node.attrs.rowspan !== 1) attrs.rowspan = node.attrs.rowspan as number
if (node.attrs.colwidth != null)
attrs['data-colwidth'] = (node.attrs.colwidth as string[]).join(',')
return attrs
}
/**
* Generate a `NodeSpec` to represent a Stencila `TableCell`.
*/
function tableCell(): NodeSpec {
return {
content: 'InlineContent*',
attrs: tableCellAttrsSpec(),
tableRole: 'cell',
isolating: true,
parseDOM: [
{ tag: 'td', getAttrs: (dom) => tableCellAttrsGet(dom as HTMLElement) },
],
// @ts-expect-error
toDOM(node) {
return ['td', tableCellAttrsSet(node), 0]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `TableCell` with `cellType` 'Header'.
*
* The reason this exists as a separate `NodeSpec` to `TableCell` is that the
* `prosemirror-tables` package seems to want to have a node type with `tableRole: header_cell`
* presumably for its commands to work.
*
* See https://github.com/ProseMirror/prosemirror-tables/blob/master/src/schema.js#L96
*/
function tableHeader(): NodeSpec {
return {
content: 'InlineContent*',
attrs: tableCellAttrsSpec(),
tableRole: 'header_cell',
isolating: true,
parseDOM: [
{ tag: 'th', getAttrs: (dom) => tableCellAttrsGet(dom as HTMLElement) },
],
// @ts-expect-error
toDOM(node) {
return ['th', tableCellAttrsSet(node), 0]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `ThematicBreak`
*/
function thematicBreak(): NodeSpec {
return {
group: 'BlockContent',
parseDOM: [{ tag: 'hr' }],
toDOM(_node) {
return [
'hr',
{ itemtype: 'https://schema.stenci.la/ThematicBreak', itemscope: '' },
]
},
}
}
/**
* Generate a `NodeSpec` to represent a Stencila `InlineContent` node type.
*/
function _inline(
name: string,
tag: string,
content: string,
marks = '_'
): NodeSpec {
return {
group: 'InlineContent',
inline: true,
content,
marks,
parseDOM: [{ tag }],
toDOM(_node) {
return [
tag,
{ itemtype: `https://schema.stenci.la/${name}`, itemscope: '' },
0,
]
},
}
}
/**
* Generate a `MarkSpec` to represent a Stencila inline node type.
*
* @param name The name of the type e.g. `Paragraph`
* @param tag The tag name of the HTML element for `toDOM` and `parseDOM`)
* @param parseDOM: The parse rules for the mark
*/
function mark(name: string, tag: string, parseDOM?: ParseRule[]): MarkSpec {
return {
parseDOM: parseDOM ?? [{ tag }],
toDOM(_node) {
return [tag, { itemtype: name, itemscope: '' }, 0]
},
}
}