Skip to content

Commit

Permalink
decode textContent #135
Browse files Browse the repository at this point in the history
  • Loading branch information
taoqf committed Jul 2, 2021
1 parent e35e4ff commit 840ffda
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/nodes/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ export default class HTMLElement extends Node {
}, '');
}
public get textContent() {
return this.rawText;
return decode(this.rawText);

This comment has been minimized.

Copy link
@nonara

nonara Jul 2, 2021

Collaborator

DOM Spec issue - see notes in PR

}
public set textContent(val: string) {
const content = [new TextNode(val, this)];
Expand Down
4 changes: 3 additions & 1 deletion src/nodes/node.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { decode } from 'he';
import NodeType from './type';
import HTMLElement from './html';

Expand All @@ -17,9 +18,10 @@ export default abstract class Node {
return this.rawText;
}
public get textContent() {
return this.rawText;
return decode(this.rawText);
}
public set textContent(val: string) {
console.error('ssssssssssssssss', val);

This comment has been minimized.

Copy link
@nonara

nonara Jul 2, 2021

Collaborator

Should this be here?

This comment has been minimized.

Copy link
@taoqf

taoqf Jul 5, 2021

Author Owner

Haha, No.

this.rawText = val;

This comment has been minimized.

Copy link
@nonara

nonara Jul 2, 2021

Collaborator

If the getter returns decoded text, the setter setting rawText seems counter-intuitive. It would make more sense to make this a getter only, however, that violates HTML DOM spec. See notes in PR

This comment has been minimized.

Copy link
@taoqf

taoqf Jul 5, 2021

Author Owner

Yes, should this be encode first?

this.rawText = encode(val);

This comment has been minimized.

Copy link
@taoqf

taoqf Jul 5, 2021

Author Owner
}
}
110 changes: 110 additions & 0 deletions src/nodes/text.1.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import { decode } from 'he';

This comment has been minimized.

Copy link
@nonara

nonara Jul 2, 2021

Collaborator

I don't believe this file should be included

This comment has been minimized.

Copy link
@taoqf

taoqf Jul 5, 2021

Author Owner

Yes.

import NodeType from './type';
import Node from './node';
import HTMLElement from './html';

/**
* TextNode to contain a text element in DOM tree.
* @param {string} value [description]
*/
export default class TextNode extends Node {
public constructor(rawText: string, parentNode: HTMLElement) {
super(parentNode);
this._rawText = rawText;
}

/**
* Node Type declaration.
* @type {Number}
*/
public nodeType = NodeType.TEXT_NODE;

private _rawText: string;
private _trimmedRawText?: string;
private _trimmedText?: string;

public get rawText() {
return this._rawText;
}

/**
* Set rawText and invalidate trimmed caches
*/
public set rawText(text: string) {
this._rawText = text;
this._trimmedRawText = void 0;
this._trimmedText = void 0;
}

/**
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
*/
public get trimmedRawText() {
if (this._trimmedRawText !== undefined) return this._trimmedRawText;
this._trimmedRawText = trimText(this.rawText);
return this._trimmedRawText;
}

/**
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
*/
public get trimmedText() {
if (this._trimmedText !== undefined) return this._trimmedText;

this._trimmedText = trimText(this.text);

return this._trimmedText;
}

/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
public get text() {
return decode(this.rawText);
}

/**
* Detect if the node contains only white space.
* @return {boolean}
*/
public get isWhitespace() {
return /^(\s| )*$/.test(this.rawText);
}

public toString() {
return this.rawText;
}
}

/**
* Trim whitespace except single leading/trailing non-breaking space
*/
function trimText(text: string): string {
let i = 0;
let startPos;
let endPos;

while (i >= 0 && i < text.length) {
if (/\S/.test(text[i])) {
if (startPos === undefined) {
startPos = i;
i = text.length;
} else {
endPos = i;
i = void 0;
}
}

if (startPos === undefined) i++;
else i--;
}

if (startPos === undefined) startPos = 0;
if (endPos === undefined) endPos = text.length - 1;

const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);

return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
}
4 changes: 2 additions & 2 deletions src/nodes/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ export default class TextNode extends Node {
if (startPos === undefined) startPos = 0;
if (endPos === undefined) endPos = text.length - 1;

const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);

this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');

Expand Down
58 changes: 58 additions & 0 deletions test/135.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
const { parse, TextNode, HTMLElement } = require('../dist');

describe('pr 135', function () {
it('shoud not decode text', function () {

This comment has been minimized.

Copy link
@nonara

nonara Jul 2, 2021

Collaborator

This is redundant, replicating the check of the next test should not decode text from parseHTML(), which is also redundant against the test in test/html.js under 'encode/decode > shoud not decode text'

const content = `&lt;p&gt; Not a p tag &lt;br /&gt; at all`;
const root = parse(`<div>${content}</div>`);
const div = root.firstChild;
div.innerHTML.should.eql(content);
div.textContent.should.eql('<p> Not a p tag <br /> at all');
// div.innerText.should.eql('<p> Not a p tag <br /> at all');

// const textNode = div.firstChild;
// textNode.rawText.should.eql(content);
// textNode.toString().should.eql('aaa')
});

it('should not decode text from parseHTML()', function () {
const content = `&lt;p&gt; Not a p tag &lt;br /&gt; at all`;
const root = parse(`<div>${content}</div>`);
root.childNodes.should.have.length(1);

const divNode = root.firstChild;
divNode.childNodes.should.have.length(1);

const textNode = divNode.firstChild;
textNode.rawText.should.eql(content);
});

it(`should decode for node text property`, function () {
const encodedText = `My&gt;text`;
const decodedText = `My>text`;
const root = parse(`<p>${encodedText}</p>`);

const pNode = root.firstChild;
pNode.innerHTML.should.eql(encodedText);
pNode.textContent.should.eql(decodedText);

const textNode = pNode.firstChild;
textNode.textContent.should.eql(decodedText);
});

it('should remove whitespaces while preserving nodes with content', function () {

This comment has been minimized.

Copy link
@nonara

nonara Jul 2, 2021

Collaborator

Unnecessary test, given the changes made

const root = parse('<p> \r \n \t <h5> 123&nbsp; </h5></p>');

const textNode = new TextNode(' 123&nbsp; ');
textNode.rawText = textNode.trimmedText;
textNode.rawText.should.eql(' 123&nbsp; ');

const p = new HTMLElement('p', {}, '', root);
p
.appendChild(new HTMLElement('h5', {}, ''))
.appendChild(textNode);

p.toString().should.eql('<p><h5> 123&nbsp; </h5></p>');
root.firstChild.removeWhitespace().toString().should.eql('<p><h5> 123&nbsp; </h5></p>');
root.firstChild.removeWhitespace().should.eql(p);
})
});
5 changes: 2 additions & 3 deletions test/html.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ const should = require('should');
const fs = require('fs');

const HTMLParser = require('../dist');
const Matcher = require('../dist/matcher').default;
const HTMLElement = require('../dist/nodes/html').default;
const TextNode = require('../dist/nodes/text').default;
const CommentNode = require('../dist/nodes/comment').default;
Expand Down Expand Up @@ -126,10 +125,10 @@ describe('HTML Parser', function () {
const script = root.firstChild;
const style = root.lastChild;
script.childNodes.should.not.be.empty;
script.childNodes.should.eql([ new TextNode('1', script) ]);
script.childNodes.should.eql([new TextNode('1', script)]);
script.text.should.eql('1');
style.childNodes.should.not.be.empty;
style.childNodes.should.eql([ new TextNode('2&amp;', style) ]);
style.childNodes.should.eql([new TextNode('2&amp;', style)]);
style.text.should.eql('2&');
style.rawText.should.eql('2&amp;');
});
Expand Down

0 comments on commit 840ffda

Please sign in to comment.