diff --git a/lib/index.js b/lib/index.js index ef11a7b..835489e 100644 --- a/lib/index.js +++ b/lib/index.js @@ -14,12 +14,16 @@ * Any parent. * @typedef {'normal' | 'pre' | 'nowrap' | 'pre-wrap'} Whitespace * Valid and useful whitespace values (from CSS). - * @typedef {boolean} BreakValue - * Whether there was a break. - * @typedef {1 | 2} BreakNumber - * Specific break. + * @typedef {0 | 1 | 2} BreakNumber + * Specific break: + * + * * `0` — space + * * `1` — line ending + * * `2` — blank line * @typedef {'\n'} BreakForce * Forced break. + * @typedef {boolean} BreakValue + * Whether there was a break. * @typedef {BreakValue | BreakNumber | undefined} BreakBefore * Any value for a break before. * @typedef {BreakValue | BreakNumber | BreakForce | undefined} BreakAfter @@ -156,6 +160,9 @@ export function toText(tree, options = {}) { breakAfter: false }) + /** @type {Array} */ + const results = [] + // Treat `text` and `comment` as having normal white-space. // This deviates from the spec as in the DOM the node’s `.data` has to be // returned. @@ -165,7 +172,13 @@ export function toText(tree, options = {}) { // Nodes without children are treated as a void element, so `doctype` is thus // ignored. if (tree.type === 'text' || tree.type === 'comment') { - return collectText(tree, {whitespace, breakBefore: true, breakAfter: true}) + results.push( + ...collectText(tree, { + whitespace, + breakBefore: true, + breakAfter: true + }) + ) } // 1. If this element is not being rendered, or if the user agent is a @@ -179,8 +192,6 @@ export function toText(tree, options = {}) { // Important: we’ll have to account for this later though. // 2. Let results be a new empty list. - /** @type {Array} */ - let results = [] let index = -1 // 3. For each child node node of this element: @@ -190,9 +201,9 @@ export function toText(tree, options = {}) { // Each item in results will either be a JavaScript string or a // positive integer (a required line break count). // 3.2. For each item item in current, append item to results. - results = results.concat( + results.push( // @ts-expect-error Looks like a parent. - innerTextCollection(children[index], tree, { + ...innerTextCollection(children[index], tree, { whitespace, breakBefore: index ? undefined : block, breakAfter: @@ -221,8 +232,11 @@ export function toText(tree, options = {}) { if (typeof value === 'number') { if (count !== undefined && value > count) count = value } else if (value) { - if (count) result.push('\n'.repeat(count)) - count = 0 + if (count !== undefined && count > -1) { + result.push('\n'.repeat(count) || ' ') + } + + count = -1 result.push(value) } } @@ -245,11 +259,9 @@ function innerTextCollection(node, parent, info) { } if (node.type === 'text') { - return [ - info.whitespace === 'normal' - ? collectText(node, info) - : collectPreText(node) - ] + return info.whitespace === 'normal' + ? collectText(node, info) + : collectPreText(node) } return [] @@ -259,8 +271,11 @@ function innerTextCollection(node, parent, info) { * Collect an element. * * @param {Element} node + * Element node. * @param {Parent} parent * @param {CollectionInfo} info + * Info on current collection. + * @returns {Array} */ function collectElement(node, parent, info) { // First we infer the `white-space` property. @@ -376,18 +391,21 @@ function collectElement(node, parent, info) { * See: * * @param {Text | Comment} node + * Text node. * @param {CollectionInfo} info - * @returns {string} + * Info on current collection. + * @returns {Array} + * Result. */ function collectText(node, info) { const value = String(node.value) /** @type {Array} */ const lines = [] - /** @type {Array} */ + /** @type {Array} */ const result = [] let start = 0 - while (start < value.length) { + while (start <= value.length) { searchLineFeeds.lastIndex = start const match = searchLineFeeds.exec(value) @@ -397,14 +415,14 @@ function collectText(node, info) { // Any sequence of collapsible spaces and tabs immediately preceding or // following a segment break is removed. trimAndCollapseSpacesAndTabs( - // [...] ignoring bidi formatting characters (characters with the + // […] ignoring bidi formatting characters (characters with the // Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if // they were not there. value .slice(start, end) .replace(/[\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g, ''), - info.breakBefore, - info.breakAfter + start === 0 ? info.breakBefore : true, + end === value.length ? info.breakAfter : true ) ) @@ -417,7 +435,8 @@ function collectText(node, info) { // Any collapsible segment break immediately following another collapsible // segment break is removed let index = -1 - let join = '' + /** @type {BreakNumber | undefined} */ + let join while (++index < lines.length) { // * If the character immediately before or immediately after the segment @@ -429,7 +448,7 @@ function collectText(node, info) { lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */ ) { result.push(lines[index]) - join = '' + join = undefined } // * Otherwise, if the East Asian Width property [UAX11] of both the @@ -449,21 +468,30 @@ function collectText(node, info) { // * Otherwise, the segment break is converted to a space (U+0020). else if (lines[index]) { - if (join) result.push(join) + if (typeof join === 'number') result.push(join) result.push(lines[index]) - join = ' ' + join = 0 + } else if (index === 0 || index === lines.length - 1) { + // If this line is empty, and it’s the first or last, add a space. + // Note that this function is only called in normal whitespace, so we + // don’t worry about `pre`. + result.push(0) } } - return result.join('') + return result } /** - * @param {Text | Comment} node - * @returns {string} + * Collect a text node as “pre” whitespace. + * + * @param {Text} node + * Text node. + * @returns {Array} + * Result. */ function collectPreText(node) { - return String(node.value) + return [String(node.value)] } /** @@ -475,9 +503,13 @@ function collectPreText(node) { * but retains its soft wrap opportunity, if any.) * * @param {string} value + * Value to collapse. * @param {BreakBefore} breakBefore + * Whether there was a break before. * @param {BreakAfter} breakAfter + * Whether there was a break after. * @returns {string} + * Result. */ function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) { /** @type {Array} */ @@ -515,11 +547,16 @@ function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) { } /** + * Figure out the whitespace of a node. + * * We don’t support void elements here (so `nobr wbr` -> `normal` is ignored). * * @param {Node} node + * Node (typically `Element`). * @param {CollectionInfo} info + * Info on current collection. * @returns {Whitespace} + * Applied whitespace. */ function inferWhitespace(node, info) { if (node.type === 'element') { diff --git a/test.js b/test.js index 3c47c9c..f6d1442 100644 --- a/test.js +++ b/test.js @@ -319,3 +319,77 @@ test('non-normal white-space', () => { 'should support a `textarea` element' ) }) + +test('more whitespace', () => { + assert.equal( + toText(h('p', ['A\n', h('span', 'b')])), + 'A b', + 'should support line endings around element breaks (1)' + ) + + assert.equal( + toText(h('p', ['A\nb', h('span', 'c')])), + 'A bc', + 'should support line endings around element breaks (2)' + ) + + assert.equal( + toText(h('p', ['A', h('span', '\nb')])), + 'A b', + 'should support line endings around element breaks (3)' + ) + + assert.equal( + toText(h('p', ['A\n', h('span', '\nb')])), + 'A b', + 'should support line endings around element breaks (4)' + ) + + assert.equal( + toText(h('p', [h('span', 'A\n'), h('span', 'b')])), + 'A b', + 'should support line endings around element breaks (5)' + ) + + assert.equal( + toText(h('p', [h('span', 'A'), h('span', '\nb')])), + 'A b', + 'should support line endings around element breaks (6)' + ) + + assert.equal( + toText(h('p', [h('span', 'A\n'), h('span', '\nb')])), + 'A b', + 'should support line endings around element breaks (7)' + ) + + assert.equal( + toText(h('p', [h('span', 'A\n'), 'b'])), + 'A b', + 'should support line endings around element breaks (8)' + ) + + assert.equal( + toText(h('p', [h('span', 'A'), '\nb'])), + 'A b', + 'should support line endings around element breaks (9)' + ) + + assert.equal( + toText(h('p', [h('span', 'A\n'), '\nb'])), + 'A b', + 'should support line endings around element breaks (10)' + ) + + assert.equal( + toText(h('div', [h('p', [h('span', 'A\n'), '\nb'])])), + 'A b', + 'should support line endings around element breaks (11)' + ) + + assert.equal( + toText(h('pre', ['A\n', h('span', 'b')])), + 'A\nb', + 'should support line endings around element breaks (12)' + ) +})