Skip to content

Commit

Permalink
Fix line endings around element breaks in text
Browse files Browse the repository at this point in the history
Closes GH-3.
  • Loading branch information
wooorm committed Jan 5, 2023
1 parent 4de6078 commit 387eff4
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 30 deletions.
97 changes: 67 additions & 30 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@
* Any parent.
* @typedef {'normal' | 'pre' | 'nowrap' | 'pre-wrap'} Whitespace
* Valid and useful whitespace values (from CSS).
* @typedef {boolean} BreakValue
* Whether there was a break.
* @typedef {1 | 2} BreakNumber
* Specific break.
* @typedef {0 | 1 | 2} BreakNumber
* Specific break:
*
* * `0` — space
* * `1` — line ending
* * `2` — blank line
* @typedef {'\n'} BreakForce
* Forced break.
* @typedef {boolean} BreakValue
* Whether there was a break.
* @typedef {BreakValue | BreakNumber | undefined} BreakBefore
* Any value for a break before.
* @typedef {BreakValue | BreakNumber | BreakForce | undefined} BreakAfter
Expand Down Expand Up @@ -156,6 +160,9 @@ export function toText(tree, options = {}) {
breakAfter: false
})

/** @type {Array<string | BreakNumber>} */
const results = []

// Treat `text` and `comment` as having normal white-space.
// This deviates from the spec as in the DOM the node’s `.data` has to be
// returned.
Expand All @@ -165,7 +172,13 @@ export function toText(tree, options = {}) {
// Nodes without children are treated as a void element, so `doctype` is thus
// ignored.
if (tree.type === 'text' || tree.type === 'comment') {
return collectText(tree, {whitespace, breakBefore: true, breakAfter: true})
results.push(
...collectText(tree, {
whitespace,
breakBefore: true,
breakAfter: true
})
)
}

// 1. If this element is not being rendered, or if the user agent is a
Expand All @@ -179,8 +192,6 @@ export function toText(tree, options = {}) {
// Important: we’ll have to account for this later though.

// 2. Let results be a new empty list.
/** @type {Array<string | BreakNumber>} */
let results = []
let index = -1

// 3. For each child node node of this element:
Expand All @@ -190,9 +201,9 @@ export function toText(tree, options = {}) {
// Each item in results will either be a JavaScript string or a
// positive integer (a required line break count).
// 3.2. For each item item in current, append item to results.
results = results.concat(
results.push(
// @ts-expect-error Looks like a parent.
innerTextCollection(children[index], tree, {
...innerTextCollection(children[index], tree, {
whitespace,
breakBefore: index ? undefined : block,
breakAfter:
Expand Down Expand Up @@ -221,8 +232,11 @@ export function toText(tree, options = {}) {
if (typeof value === 'number') {
if (count !== undefined && value > count) count = value
} else if (value) {
if (count) result.push('\n'.repeat(count))
count = 0
if (count !== undefined && count > -1) {
result.push('\n'.repeat(count) || ' ')
}

count = -1
result.push(value)
}
}
Expand All @@ -245,11 +259,9 @@ function innerTextCollection(node, parent, info) {
}

if (node.type === 'text') {
return [
info.whitespace === 'normal'
? collectText(node, info)
: collectPreText(node)
]
return info.whitespace === 'normal'
? collectText(node, info)
: collectPreText(node)
}

return []
Expand All @@ -259,8 +271,11 @@ function innerTextCollection(node, parent, info) {
* Collect an element.
*
* @param {Element} node
* Element node.
* @param {Parent} parent
* @param {CollectionInfo} info
* Info on current collection.
* @returns {Array<string | BreakNumber>}
*/
function collectElement(node, parent, info) {
// First we infer the `white-space` property.
Expand Down Expand Up @@ -376,18 +391,21 @@ function collectElement(node, parent, info) {
* See: <https://drafts.csswg.org/css-text/#white-space-phase-1>
*
* @param {Text | Comment} node
* Text node.
* @param {CollectionInfo} info
* @returns {string}
* Info on current collection.
* @returns {Array<string | BreakNumber>}
* Result.
*/
function collectText(node, info) {
const value = String(node.value)
/** @type {Array<string>} */
const lines = []
/** @type {Array<string>} */
/** @type {Array<string | BreakNumber>} */
const result = []
let start = 0

while (start < value.length) {
while (start <= value.length) {
searchLineFeeds.lastIndex = start

const match = searchLineFeeds.exec(value)
Expand All @@ -397,14 +415,14 @@ function collectText(node, info) {
// Any sequence of collapsible spaces and tabs immediately preceding or
// following a segment break is removed.
trimAndCollapseSpacesAndTabs(
// [...] ignoring bidi formatting characters (characters with the
// [] ignoring bidi formatting characters (characters with the
// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if
// they were not there.
value
.slice(start, end)
.replace(/[\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g, ''),
info.breakBefore,
info.breakAfter
start === 0 ? info.breakBefore : true,
end === value.length ? info.breakAfter : true
)
)

Expand All @@ -417,7 +435,8 @@ function collectText(node, info) {
// Any collapsible segment break immediately following another collapsible
// segment break is removed
let index = -1
let join = ''
/** @type {BreakNumber | undefined} */
let join

while (++index < lines.length) {
// * If the character immediately before or immediately after the segment
Expand All @@ -429,7 +448,7 @@ function collectText(node, info) {
lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */
) {
result.push(lines[index])
join = ''
join = undefined
}

// * Otherwise, if the East Asian Width property [UAX11] of both the
Expand All @@ -449,21 +468,30 @@ function collectText(node, info) {

// * Otherwise, the segment break is converted to a space (U+0020).
else if (lines[index]) {
if (join) result.push(join)
if (typeof join === 'number') result.push(join)
result.push(lines[index])
join = ' '
join = 0
} else if (index === 0 || index === lines.length - 1) {
// If this line is empty, and it’s the first or last, add a space.
// Note that this function is only called in normal whitespace, so we
// don’t worry about `pre`.
result.push(0)
}
}

return result.join('')
return result
}

/**
* @param {Text | Comment} node
* @returns {string}
* Collect a text node as “pre” whitespace.
*
* @param {Text} node
* Text node.
* @returns {Array<string | BreakNumber>}
* Result.
*/
function collectPreText(node) {
return String(node.value)
return [String(node.value)]
}

/**
Expand All @@ -475,9 +503,13 @@ function collectPreText(node) {
* but retains its soft wrap opportunity, if any.)
*
* @param {string} value
* Value to collapse.
* @param {BreakBefore} breakBefore
* Whether there was a break before.
* @param {BreakAfter} breakAfter
* Whether there was a break after.
* @returns {string}
* Result.
*/
function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) {
/** @type {Array<string>} */
Expand Down Expand Up @@ -515,11 +547,16 @@ function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) {
}

/**
* Figure out the whitespace of a node.
*
* We don’t support void elements here (so `nobr wbr` -> `normal` is ignored).
*
* @param {Node} node
* Node (typically `Element`).
* @param {CollectionInfo} info
* Info on current collection.
* @returns {Whitespace}
* Applied whitespace.
*/
function inferWhitespace(node, info) {
if (node.type === 'element') {
Expand Down
74 changes: 74 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,77 @@ test('non-normal white-space', () => {
'should support a `textarea` element'
)
})

test('more whitespace', () => {
assert.equal(
toText(h('p', ['A\n', h('span', 'b')])),
'A b',
'should support line endings around element breaks (1)'
)

assert.equal(
toText(h('p', ['A\nb', h('span', 'c')])),
'A bc',
'should support line endings around element breaks (2)'
)

assert.equal(
toText(h('p', ['A', h('span', '\nb')])),
'A b',
'should support line endings around element breaks (3)'
)

assert.equal(
toText(h('p', ['A\n', h('span', '\nb')])),
'A b',
'should support line endings around element breaks (4)'
)

assert.equal(
toText(h('p', [h('span', 'A\n'), h('span', 'b')])),
'A b',
'should support line endings around element breaks (5)'
)

assert.equal(
toText(h('p', [h('span', 'A'), h('span', '\nb')])),
'A b',
'should support line endings around element breaks (6)'
)

assert.equal(
toText(h('p', [h('span', 'A\n'), h('span', '\nb')])),
'A b',
'should support line endings around element breaks (7)'
)

assert.equal(
toText(h('p', [h('span', 'A\n'), 'b'])),
'A b',
'should support line endings around element breaks (8)'
)

assert.equal(
toText(h('p', [h('span', 'A'), '\nb'])),
'A b',
'should support line endings around element breaks (9)'
)

assert.equal(
toText(h('p', [h('span', 'A\n'), '\nb'])),
'A b',
'should support line endings around element breaks (10)'
)

assert.equal(
toText(h('div', [h('p', [h('span', 'A\n'), '\nb'])])),
'A b',
'should support line endings around element breaks (11)'
)

assert.equal(
toText(h('pre', ['A\n', h('span', 'b')])),
'A\nb',
'should support line endings around element breaks (12)'
)
})

0 comments on commit 387eff4

Please sign in to comment.