diff --git a/src/nodes/html.ts b/src/nodes/html.ts index 4d1f21b..2e51f75 100644 --- a/src/nodes/html.ts +++ b/src/nodes/html.ts @@ -133,6 +133,42 @@ export default class HTMLElement extends Node { return JSON.stringify(attr.replace(/"/g, '"')); } + + /** + * Trim all whitespace except single leading/trailing non-breaking space + * @param text string to trim + * @returns {string} trimmed value + * @private + */ + private trimTextNodeWhitespace(text: string): string { + let i = 0; + let startPos; + let endPos; + + while (i >= 0 && i < text.length) { + if (/\S/.test(text[i])) { + if (startPos === undefined) { + startPos = i; + i = text.length; + } else { + endPos = i; + i = void 0; + } + } + + if (startPos === undefined) i++; + else i--; + } + + if (startPos === undefined) startPos = 0; + if (endPos === undefined) endPos = text.length - 1; + + const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]); + const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]); + + return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : ''); + } + /** * Creates an instance of HTMLElement. * @param keyAttrs id and class attribute @@ -401,7 +437,7 @@ export default class HTMLElement extends Node { if ((node as TextNode).isWhitespace) { return; } - node.rawText = node.rawText.trim(); + node.rawText = this.trimTextNodeWhitespace(node.rawText); } else if (node.nodeType === NodeType.ELEMENT_NODE) { (node as HTMLElement).removeWhitespace(); } diff --git a/test/html.js b/test/html.js index 6c9b8de..f8c6c01 100644 --- a/test/html.js +++ b/test/html.js @@ -198,7 +198,7 @@ describe('HTML Parser', function () { describe('#removeWhitespace()', function () { it('should remove whitespaces while preserving nodes with content', function () { - const root = parseHTML('

\r \n \t

123

'); + const root = parseHTML('

\r \n \t

123

'); const p = new HTMLElement('p', {}, '', root); p.appendChild(new HTMLElement('h5', {}, '')) @@ -206,6 +206,12 @@ describe('HTML Parser', function () { root.firstChild.removeWhitespace().should.eql(p); }); + + it('should preserve legitimate leading/trailing whitespace in TextNode', function () { + parseHTML('

Hello World!

').removeWhitespace().firstChild.text.should.eql('Hello World!'); + parseHTML('

\t\nHello\n\tWorld!

').removeWhitespace().firstChild.text.should.eql('HelloWorld!'); + parseHTML('

\t\n Hello \n\tWorld!

').removeWhitespace().firstChild.text.should.eql(' Hello World!'); + }); }); describe('#rawAttributes', function () {