diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000..307a483 --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,142 @@ +module.exports = { + "env": { + "browser": true, + "es6": true, + "node": true + }, + "extends": [ + "plugin:@typescript-eslint/recommended", + "plugin:@typescript-eslint/recommended-requiring-type-checking" + ], + "parser": "@typescript-eslint/parser", + "parserOptions": { + "project": "tsconfig.json", + "sourceType": "module" + }, + "plugins": [ + "@typescript-eslint", + "@typescript-eslint/tslint" + ], + "rules": { + "@typescript-eslint/prefer-includes": "off", + "@typescript-eslint/unbound-method": "off", + "@typescript-eslint/no-non-null-assertion": "off", + "@typescript-eslint/member-delimiter-style": "error", + "@typescript-eslint/no-misused-promises": "error", + "@typescript-eslint/explicit-function-return-type": "off", + "@typescript-eslint/camelcase": "off", + "@typescript-eslint/adjacent-overload-signatures": "error", + "@typescript-eslint/array-type": "error", + "@typescript-eslint/ban-types": "error", + "@typescript-eslint/class-name-casing": "error", + "@typescript-eslint/consistent-type-assertions": "error", + "@typescript-eslint/indent": [ + "error", + "tab", + { + "ArrayExpression": "first", + "ObjectExpression": "first" + } + ], + "@typescript-eslint/interface-name-prefix": "off", + "@typescript-eslint/no-empty-function": "off", + "@typescript-eslint/no-empty-interface": "error", + "@typescript-eslint/no-explicit-any": "off", + "@typescript-eslint/no-misused-new": "error", + "@typescript-eslint/no-namespace": "error", + "@typescript-eslint/no-parameter-properties": "off", + "@typescript-eslint/no-use-before-define": "off", + "@typescript-eslint/no-var-requires": "error", + "@typescript-eslint/prefer-for-of": "error", + "@typescript-eslint/prefer-function-type": "error", + "@typescript-eslint/prefer-namespace-keyword": "error", + "@typescript-eslint/quotes": [ + "error", + "single", + { + "avoidEscape": true + } + ], + "@typescript-eslint/triple-slash-reference": "error", + "@typescript-eslint/unified-signatures": "off", + "camelcase": "off", + "comma-dangle": "error", + "complexity": "off", + "constructor-super": "error", + "dot-notation": "error", + "eqeqeq": [ + "error", + "smart" + ], + "guard-for-in": "off", + "id-blacklist": [ + "error", + "any", + "Number", + "number", + "String", + "string", + "Boolean", + "boolean", + "Undefined" + ], + "id-match": "error", + "max-classes-per-file": [ + "error", + 1 + ], + "max-len": "off", + "new-parens": "error", + "no-bitwise": "off", + "no-caller": "error", + "no-cond-assign": "off", + "no-console": "off", + "no-debugger": "error", + "no-empty": "off", + "no-eval": "error", + "no-fallthrough": "off", + "no-invalid-this": "off", + "no-multiple-empty-lines": "off", + "no-new-wrappers": "error", + "no-shadow": [ + "error", + { + "hoist": "all" + } + ], + "no-throw-literal": "error", + "no-trailing-spaces": "error", + "no-undef-init": "error", + "no-underscore-dangle": "off", + "no-unsafe-finally": "error", + "no-unused-expressions": "error", + "no-unused-labels": "error", + "no-var": "error", + "object-shorthand": "error", + "one-var": [ + "error", + "never" + ], + "prefer-arrow/prefer-arrow-functions": "off", + "prefer-const": "error", + "radix": "error", + "spaced-comment": "error", + "use-isnan": "error", + "valid-typeof": "off", + "@typescript-eslint/tslint/config": [ + "error", + { + "rules": { + "jsdoc-format": true, + "no-reference-import": true, + "no-unsafe-any": true, + "whitespace": [ + true, + "check-branch", + "check-operator" + ] + } + } + ] + } +}; diff --git a/package.json b/package.json index 0d069f0..77d7b46 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "types": "dist/index.d.ts", "scripts": { "test": "mocha", - "lint": "eslint ./src/*.ts", + "lint": "eslint ./src/*.ts ./src/**/*.ts", "clean": "del-cli ./dist/", "ts:cjs": "tsc -m commonjs", "ts:umd": "tsc -t es5 -m umd -d false --outDir ./dist/umd/", diff --git a/src/back.ts b/src/back.ts new file mode 100644 index 0000000..aa22b70 --- /dev/null +++ b/src/back.ts @@ -0,0 +1,3 @@ +export default function arr_back(arr: T[]) { + return arr[arr.length - 1]; +} diff --git a/src/index.ts b/src/index.ts index ecd7147..2555447 100755 --- a/src/index.ts +++ b/src/index.ts @@ -1,791 +1,9 @@ -import { decode } from 'he'; - -export enum NodeType { - ELEMENT_NODE = 1, - TEXT_NODE = 3, - COMMENT_NODE = 8 -} - -/** - * Node Class as base class for TextNode and HTMLElement. - */ -export abstract class Node { - nodeType: NodeType; - childNodes = [] as Node[]; - text: string; - rawText: string; - abstract toString(): String; -} -/** - * TextNode to contain a text element in DOM tree. - * @param {string} value [description] - */ -export class TextNode extends Node { - constructor(value: string) { - super(); - this.rawText = value; - } - - /** - * Node Type declaration. - * @type {Number} - */ - nodeType = NodeType.TEXT_NODE; - - /** - * Get unescaped text value of current node and its children. - * @return {string} text content - */ - get text() { - return decode(this.rawText); - } - - /** - * Detect if the node contains only white space. - * @return {bool} - */ - get isWhitespace() { - return /^(\s| )*$/.test(this.rawText); - } - - toString() { - return this.text; - } -} - -export class CommentNode extends Node { - constructor(value: string) { - super(); - this.rawText = value; - } - - /** - * Node Type declaration. - * @type {Number} - */ - nodeType = NodeType.COMMENT_NODE; - - /** - * Get unescaped text value of current node and its children. - * @return {string} text content - */ - get text() { - return decode(this.rawText); - } - - toString() { - return ``; - } -} - -const kBlockElements = { - div: true, - p: true, - // ul: true, - // ol: true, - li: true, - // table: true, - // tr: true, - td: true, - section: true, - br: true -}; - -export interface KeyAttributes { - id?: string; - class?: string; -} - -export interface Attributes { - [key: string]: string; -} - -export interface RawAttributes { - [key: string]: string; -} - -function arr_back(arr: T[]) { - return arr[arr.length - 1]; -} - -/** - * HTMLElement, which contains a set of children. - * - * Note: this is a minimalist implementation, no complete tree - * structure provided (no parentNode, nextSibling, - * previousSibling etc). - * @class HTMLElement - * @extends {Node} - */ -export class HTMLElement extends Node { - private _attrs: Attributes; - private _rawAttrs: RawAttributes; - public id: string; - public classNames = [] as string[]; - /** - * Node Type declaration. - */ - public nodeType = NodeType.ELEMENT_NODE; - /** - * Creates an instance of HTMLElement. - * @param keyAttrs id and class attribute - * @param [rawAttrs] attributes in string - * - * @memberof HTMLElement - */ - constructor(public tagName: string, keyAttrs: KeyAttributes, private rawAttrs = '', public parentNode = null as Node) { - super(); - this.rawAttrs = rawAttrs || ''; - this.parentNode = parentNode || null; - this.childNodes = []; - if (keyAttrs.id) { - this.id = keyAttrs.id; - } - if (keyAttrs.class) { - this.classNames = keyAttrs.class.split(/\s+/); - } - } - /** - * Remove Child element from childNodes array - * @param {HTMLElement} node node to remove - */ - public removeChild(node: Node) { - this.childNodes = this.childNodes.filter((child) => { - return (child !== node); - }); - } - /** - * Exchanges given child with new child - * @param {HTMLElement} oldNode node to exchange - * @param {HTMLElement} newNode new node - */ - public exchangeChild(oldNode: Node, newNode: Node) { - let idx = -1; - for (let i = 0; i < this.childNodes.length; i++) { - if (this.childNodes[i] === oldNode) { - idx = i; - break; - } - } - this.childNodes[idx] = newNode; - } - /** - * Get escpaed (as-it) text value of current node and its children. - * @return {string} text content - */ - get rawText() { - let res = ''; - for (let i = 0; i < this.childNodes.length; i++) - res += this.childNodes[i].rawText; - return res; - } - /** - * Get unescaped text value of current node and its children. - * @return {string} text content - */ - get text() { - return decode(this.rawText); - } - /** - * Get structured Text (with '\n' etc.) - * @return {string} structured text - */ - get structuredText() { - let currentBlock = [] as string[]; - const blocks = [currentBlock]; - function dfs(node: Node) { - if (node.nodeType === NodeType.ELEMENT_NODE) { - if (kBlockElements[(node as HTMLElement).tagName]) { - if (currentBlock.length > 0) { - blocks.push(currentBlock = []); - } - node.childNodes.forEach(dfs); - if (currentBlock.length > 0) { - blocks.push(currentBlock = []); - } - } else { - node.childNodes.forEach(dfs); - } - } else if (node.nodeType === NodeType.TEXT_NODE) { - if ((node as TextNode).isWhitespace) { - // Whitespace node, postponed output - (currentBlock as any).prependWhitespace = true; - } else { - let text = node.text; - if ((currentBlock as any).prependWhitespace) { - text = ' ' + text; - (currentBlock as any).prependWhitespace = false; - } - currentBlock.push(text); - } - } - } - dfs(this); - return blocks - .map(function (block) { - // Normalize each line's whitespace - return block.join('').trim().replace(/\s{2,}/g, ' '); - }) - .join('\n').replace(/\s+$/, ''); // trimRight; - } - - public toString() { - const tag = this.tagName; - if (tag) { - const is_un_closed = /^meta$/i.test(tag); - const is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag); - const attrs = this.rawAttrs ? ' ' + this.rawAttrs : ''; - if (is_un_closed) { - return `<${tag}${attrs}>`; - } else if (is_self_closed) { - return `<${tag}${attrs} />`; - } else { - return `<${tag}${attrs}>${this.innerHTML}`; - } - } else { - return this.innerHTML; - } - } - - get innerHTML() { - return this.childNodes.map((child) => { - return child.toString(); - }).join(''); - } - - public set_content(content: string | Node | Node[]) { - if (content instanceof Node) { - content = [content]; - } else if (typeof content == 'string') { - const r = parse(content); - content = r.childNodes.length ? r.childNodes : [new TextNode(content)]; - } - this.childNodes = content as Node[]; - } - - get outerHTML() { - return this.toString(); - } - - /** - * Trim element from right (in block) after seeing pattern in a TextNode. - * @param {RegExp} pattern pattern to find - * @return {HTMLElement} reference to current node - */ - public trimRight(pattern: RegExp) { - for (let i = 0; i < this.childNodes.length; i++) { - const childNode = this.childNodes[i]; - if (childNode.nodeType === NodeType.ELEMENT_NODE) { - (childNode as HTMLElement).trimRight(pattern); - } else { - const index = childNode.rawText.search(pattern); - if (index > -1) { - childNode.rawText = childNode.rawText.substr(0, index); - // trim all following nodes. - this.childNodes.length = i + 1; - } - } - } - return this; - } - /** - * Get DOM structure - * @return {string} strucutre - */ - get structure() { - const res = [] as string[]; - let indention = 0; - function write(str: string) { - res.push(' '.repeat(indention) + str); - } - function dfs(node: HTMLElement) { - const idStr = node.id ? ('#' + node.id) : ''; - const classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : ''; - write(node.tagName + idStr + classStr); - indention++; - for (let i = 0; i < node.childNodes.length; i++) { - const childNode = node.childNodes[i]; - if (childNode.nodeType === NodeType.ELEMENT_NODE) { - dfs(childNode as HTMLElement); - } else if (childNode.nodeType === NodeType.TEXT_NODE) { - if (!(childNode as TextNode).isWhitespace) - write('#text'); - } - } - indention--; - } - dfs(this); - return res.join('\n'); - } - - /** - * Remove whitespaces in this sub tree. - * @return {HTMLElement} pointer to this - */ - public removeWhitespace() { - let o = 0; - for (let i = 0; i < this.childNodes.length; i++) { - const node = this.childNodes[i]; - if (node.nodeType === NodeType.TEXT_NODE) { - if ((node as TextNode).isWhitespace) - continue; - node.rawText = node.rawText.trim(); - } else if (node.nodeType === NodeType.ELEMENT_NODE) { - (node as HTMLElement).removeWhitespace(); - } - this.childNodes[o++] = node; - } - this.childNodes.length = o; - return this; - } - - /** - * Query CSS selector to find matching nodes. - * @param {string} selector Simplified CSS selector - * @param {Matcher} selector A Matcher instance - * @return {HTMLElement[]} matching elements - */ - public querySelectorAll(selector: string | Matcher) { - let matcher: Matcher; - if (selector instanceof Matcher) { - matcher = selector; - matcher.reset(); - } else { - if (selector.includes(',')) { - const selectors = selector.split(',') as string[]; - return Array.from(selectors.reduce((pre, cur) => { - const result = this.querySelectorAll(cur.trim()) as HTMLElement[]; - return result.reduce((p, c) => { - return p.add(c); - }, pre); - }, new Set())); - } - matcher = new Matcher(selector); - } - const res = [] as HTMLElement[]; - const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean; }[]; - for (let i = 0; i < this.childNodes.length; i++) { - stack.push([this.childNodes[i], 0, false]); - while (stack.length) { - const state = arr_back(stack); - const el = state[0]; - if (state[1] === 0) { - // Seen for first time. - if (el.nodeType !== NodeType.ELEMENT_NODE) { - stack.pop(); - continue; - } - if (state[2] = matcher.advance(el)) { - if (matcher.matched) { - res.push(el as HTMLElement); - // no need to go further. - matcher.rewind(); - stack.pop(); - continue; - } - } - } - if (state[1] < el.childNodes.length) { - stack.push([el.childNodes[state[1]++], 0, false]); - } else { - if (state[2]) - matcher.rewind(); - stack.pop(); - } - } - } - return res; - } - - /** - * Query CSS Selector to find matching node. - * @param {string} selector Simplified CSS selector - * @param {Matcher} selector A Matcher instance - * @return {HTMLElement} matching node - */ - public querySelector(selector: string | Matcher) { - let matcher: Matcher; - if (selector instanceof Matcher) { - matcher = selector; - matcher.reset(); - } else { - matcher = new Matcher(selector); - } - const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean; }[]; - for (let i = 0; i < this.childNodes.length; i++) { - stack.push([this.childNodes[i], 0, false]); - while (stack.length) { - const state = arr_back(stack); - const el = state[0]; - if (state[1] === 0) { - // Seen for first time. - if (el.nodeType !== NodeType.ELEMENT_NODE) { - stack.pop(); - continue; - } - if (state[2] = matcher.advance(el)) { - if (matcher.matched) { - return el as HTMLElement; - } - } - } - if (state[1] < el.childNodes.length) { - stack.push([el.childNodes[state[1]++], 0, false]); - } else { - if (state[2]) - matcher.rewind(); - stack.pop(); - } - } - } - return null; - } - - /** - * Append a child node to childNodes - * @param {Node} node node to append - * @return {Node} node appended - */ - public appendChild(node: T) { - // node.parentNode = this; - this.childNodes.push(node); - if (node instanceof HTMLElement) { - node.parentNode = this; - } - return node; - } - - /** - * Get first child node - * @return {Node} first child node - */ - get firstChild() { - return this.childNodes[0]; - } - - /** - * Get last child node - * @return {Node} last child node - */ - get lastChild() { - return arr_back(this.childNodes); - } - - /** - * Get attributes - * @return {Object} parsed and unescaped attributes - */ - get attributes() { - if (this._attrs) - return this._attrs; - this._attrs = {}; - const attrs = this.rawAttributes; - for (const key in attrs) { - const val = attrs[key] || ''; - this._attrs[key] = decode(val.replace(/^['"]/, '').replace(/['"]$/, '')); - } - return this._attrs; - } - - /** - * Get escaped (as-it) attributes - * @return {Object} parsed attributes - */ - get rawAttributes() { - if (this._rawAttrs) - return this._rawAttrs; - const attrs = {} as RawAttributes; - if (this.rawAttrs) { - const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*("(?:[^"]*)"|'(?:[^']*)'|(?:\S+)))?/ig; - let match: RegExpExecArray; - while (match = re.exec(this.rawAttrs)) { - attrs[match[1]] = match[2] || null; - } - } - this._rawAttrs = attrs; - return attrs; - } - - /** - * Set an attribute value to the HTMLElement - * @param {string} key The attribute name - * @param {string|number} value The value to set, or null / undefined to remove an attribute - */ - setAttribute(key: string, value: string | number) { - // Update the this.attributes - if (this._attrs) { - delete this._attrs; - } - const attrs = this.rawAttributes; // ref this._rawAttrs - if (value === undefined || value === null) { - delete attrs[key]; - } else { - attrs[key] = JSON.stringify(value); - // if (typeof value === 'string') { - // attrs[key] = JSON.stringify(encode(value));//??? should we encode value here? - // } else { - // attrs[key] = JSON.stringify(value); - // } - } - // Update rawString - this.rawAttrs = Object.keys(attrs).map((name) => { - const val = attrs[name]; - if (val === undefined || val === null) { - return name; - } else { - return name + '=' + val; - } - }).join(' '); - } - - /** - * Replace all the attributes of the HTMLElement by the provided attributes - * @param {Attributes} attributes the new attribute set - */ - setAttributes(attributes: Attributes) { - // Update the this.attributes - if (this._attrs) { - delete this._attrs; - } - // Update the raw attributes map - if (this._rawAttrs) { - delete this._rawAttrs; - } - // Update rawString - this.rawAttrs = Object.keys(attributes).map((name) => { - const val = attributes[name]; - if (val === undefined || val === null) { - return name; - } else { - return name + '=' + JSON.stringify(val); - // if (typeof val === 'string') { - // return name + '=' + JSON.stringify(encode(val)); //??? should we encode value here? - // } else { - // return name + '=' + JSON.stringify(val); - // } - } - }).join(' '); - } -} - -interface MatherFunction { func: any; tagName: string; classes: string | string[]; attr_key: any; value: any; } - -/** - * Cache to store generated match functions - * @type {Object} - */ -let pMatchFunctionCache = {} as { [name: string]: MatherFunction }; - -/** - * Function cache - */ -const functionCache = { - "f145": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - if (el.id != tagName.substr(1)) return false; - for (let cls = classes, i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false; - return true; - }, - "f45": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - for (let cls = classes, i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false; - return true; - }, - "f15": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - if (el.id != tagName.substr(1)) return false; - return true; - }, - "f1": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - if (el.id != tagName.substr(1)) return false; - }, - "f5": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - el = el || {} as HTMLElement; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - return true; - }, - "f245": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - let attrs = el.attributes; for (let key in attrs) { const val = attrs[key]; if (key == attr_key && val == value) { return true; } } return false; - // for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}} - // return true; - }, - "f25": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - let attrs = el.attributes; for (let key in attrs) { const val = attrs[key]; if (key == attr_key && val == value) { return true; } } return false; - //return true; - }, - "f2": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - let attrs = el.attributes; for (let key in attrs) { const val = attrs[key]; if (key == attr_key && val == value) { return true; } } return false; - }, - "f345": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - if (el.tagName != tagName) return false; - for (let cls = classes, i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false; - return true; - }, - "f35": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - if (el.tagName != tagName) return false; - return true; - }, - "f3": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { - "use strict"; - tagName = tagName || ""; - classes = classes || []; - attr_key = attr_key || ""; - value = value || ""; - if (el.tagName != tagName) return false; - } -} -/** - * Matcher class to make CSS match - * - * @class Matcher - */ -export class Matcher { - private matchers: MatherFunction[]; - private nextMatch = 0; - /** - * Creates an instance of Matcher. - * @param {string} selector - * - * @memberof Matcher - */ - constructor(selector: string) { - functionCache["f5"] = functionCache["f5"]; - this.matchers = selector.split(' ').map((matcher) => { - if (pMatchFunctionCache[matcher]) - return pMatchFunctionCache[matcher]; - const parts = matcher.split('.'); - const tagName = parts[0]; - const classes = parts.slice(1).sort(); - let source = '"use strict";'; - let function_name = 'f'; - let attr_key = ""; - let value = ""; - if (tagName && tagName != '*') { - let matcher: RegExpMatchArray; - if (tagName[0] == '#') { - source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';//1 - function_name += '1'; - } else if (matcher = tagName.match(/^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/)) { - attr_key = matcher[1]; - let method = matcher[2]; - if (method !== '=' && method !== '!=') { - throw new Error('Selector not supported, Expect [key${op}value].op must be =,!='); - } - if (method === '=') { - method = '=='; - } - value = matcher[7] || matcher[8]; - - source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;//2 - function_name += '2'; - } else { - source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';//3 - function_name += '3'; - } - } - if (classes.length > 0) { - source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';//4 - function_name += '4'; - } - source += 'return true;';//5 - function_name += '5'; - let obj = { - func: functionCache[function_name], - tagName: tagName || "", - classes: classes || "", - attr_key: attr_key || "", - value: value || "" - } - source = source || ""; - return pMatchFunctionCache[matcher] = obj as MatherFunction; - }); - } - /** - * Trying to advance match pointer - * @param {HTMLElement} el element to make the match - * @return {bool} true when pointer advanced. - */ - advance(el: Node) { - if (this.nextMatch < this.matchers.length && - this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) { - this.nextMatch++; - return true; - } - return false; - } - /** - * Rewind the match pointer - */ - rewind() { - this.nextMatch--; - } - /** - * Trying to determine if match made. - * @return {bool} true when the match is made - */ - get matched() { - return this.nextMatch == this.matchers.length; - } - /** - * Rest match pointer. - * @return {[type]} [description] - */ - reset() { - this.nextMatch = 0; - } - /** - * flush cache to free memory - */ - flushCache() { - pMatchFunctionCache = {}; - } -} +import arr_back from './back'; +import CommentNode from './nodes/comment'; +export { default as HTMLElement } from './nodes/html'; +import HTMLElement from './nodes/html'; +import TextNode from './nodes/text'; +export { default as Node } from './nodes/node'; // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name const kMarkupPattern = /)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig; @@ -831,25 +49,26 @@ const kBlockTextElements = { pre: true }; -/** - * Parses HTML and returns a root element - * Parse a chuck of HTML source. - * @param {string} data html - * @return {HTMLElement} root element - */ -export function parse(data: string, options?: { +export interface Options { lowerCaseTagName?: boolean; noFix?: boolean; script?: boolean; style?: boolean; pre?: boolean; comment?: boolean; -}) { +} + +/** + * Parses HTML and returns a root element + * Parse a chuck of HTML source. + * @param {string} data html + * @return {HTMLElement} root element + */ +export function parse(data: string, options = {} as Options) { const root = new HTMLElement(null, {}); let currentParent = root; const stack = [root]; let lastTextPos = -1; - options = options || {} as any; let match: RegExpExecArray; while (match = kMarkupPattern.exec(data)) { if (lastTextPos > -1) { @@ -860,7 +79,7 @@ export function parse(data: string, options?: { } } lastTextPos = kMarkupPattern.lastIndex; - if (match[0][1] == '!') { + if (match[0][1] === '!') { // this is a comment if (options.comment) { // Only keep what is in between @@ -873,13 +92,14 @@ export function parse(data: string, options?: { match[2] = match[2].toLowerCase(); if (!match[1]) { // not or ... - let closeMarkup = ''; - let index = data.indexOf(closeMarkup, kMarkupPattern.lastIndex); + const closeMarkup = ''; + const index = data.indexOf(closeMarkup, kMarkupPattern.lastIndex); if (options[match[2]]) { let text: string; - if (index == -1) { + if (index === -1) { // there is no matching ending for the text element. text = data.substr(kMarkupPattern.lastIndex); } else { @@ -903,7 +123,7 @@ export function parse(data: string, options?: { currentParent.appendChild(new TextNode(text)); } } - if (index == -1) { + if (index === -1) { lastTextPos = kMarkupPattern.lastIndex = data.length + 1; } else { lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length; @@ -915,14 +135,15 @@ export function parse(data: string, options?: { kSelfClosingElements[match[2]]) { // or
etc. while (true) { - if (currentParent.tagName == match[2]) { + if (currentParent.tagName === match[2]) { stack.pop(); currentParent = arr_back(stack); break; } else { + const tagName = currentParent.tagName as 'li' | 'a' | 'b' | 'i' | 'p' | 'td' | 'th'; // Trying to close current tag, and move on - if (kElementsClosedByClosing[currentParent.tagName]) { - if (kElementsClosedByClosing[currentParent.tagName][match[2]]) { + if (kElementsClosedByClosing[tagName]) { + if (kElementsClosedByClosing[tagName][match[2]]) { stack.pop(); currentParent = arr_back(stack); continue; @@ -934,7 +155,7 @@ export function parse(data: string, options?: { } } } - type Response = (HTMLElement | TextNode) & { valid: boolean; }; + type Response = (HTMLElement | TextNode) & { valid: boolean }; const valid = !!(stack.length === 1); if (!options.noFix) { const response = root as Response; @@ -974,3 +195,5 @@ export function parse(data: string, options?: { return response; } } + +export default parse; diff --git a/src/matcher.ts b/src/matcher.ts new file mode 100644 index 0000000..e4e94a1 --- /dev/null +++ b/src/matcher.ts @@ -0,0 +1,240 @@ +import HTMLElement from './nodes/html'; + +interface MatherFunction { + func(el: HTMLElement, tagName: string, classes: string[] | string, attr_key: string, value: string): boolean; + tagName: string; + classes: string | string[]; + attr_key: string; + value: string; +} + +/** + * Cache to store generated match functions + * @type {Object} + */ +let pMatchFunctionCache = {} as { [name: string]: MatherFunction }; + +/** + * Function cache + */ +const functionCache = { + f145(el: HTMLElement, tagName: string, classes: string[]) { + 'use strict'; + tagName = tagName || ''; + classes = classes || []; + if (el.id !== tagName.substr(1)) { + return false; + } + for (let cls = classes, i = 0; i < cls.length; i++) { + if (el.classNames.indexOf(cls[i]) === -1) { + return false; + } + } + return true; + }, + f45(el: HTMLElement, tagName: string, classes: string[]) { + 'use strict'; + tagName = tagName || ''; + classes = classes || []; + for (let cls = classes, i = 0; i < cls.length; i++) { + if (el.classNames.indexOf(cls[i]) === -1) { + return false; + } + } + return true; + }, + f15(el: HTMLElement, tagName: string) { + 'use strict'; + tagName = tagName || ''; + if (el.id !== tagName.substr(1)) { + return false; + } + return true; + }, + f1(el: HTMLElement, tagName: string) { + 'use strict'; + tagName = tagName || ''; + if (el.id !== tagName.substr(1)) { + return false; + } + }, + f5() { + 'use strict'; + return true; + }, + f245(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { + 'use strict'; + tagName = tagName || ''; + classes = classes || []; + attr_key = attr_key || ''; + value = value || ''; + const attrs = el.attributes; + return Object.keys(attrs).some((key) => { + const val = attrs[key]; + return key === attr_key && val === value + }); + // for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}} + // return true; + }, + f25(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { + 'use strict'; + tagName = tagName || ''; + classes = classes || []; + attr_key = attr_key || ''; + value = value || ''; + const attrs = el.attributes; + return Object.keys(attrs).some((key) => { + const val = attrs[key]; + return key === attr_key && val === value + }); + // return true; + }, + f2(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) { + 'use strict'; + tagName = tagName || ''; + classes = classes || []; + attr_key = attr_key || ''; + value = value || ''; + const attrs = el.attributes; + return Object.keys(attrs).some((key) => { + const val = attrs[key]; + return key === attr_key && val === value + }); + }, + f345(el: HTMLElement, tagName: string, classes: string[]) { + 'use strict'; + tagName = tagName || ''; + classes = classes || []; + if (el.tagName !== tagName) { + return false; + } + for (let cls = classes, i = 0; i < cls.length; i++) { + if (el.classNames.indexOf(cls[i]) === -1) { + return false; + } + } + return true; + }, + f35(el: HTMLElement, tagName: string) { + 'use strict'; + tagName = tagName || ''; + return el.tagName === tagName; + }, + f3(el: HTMLElement, tagName: string) { + 'use strict'; + tagName = tagName || ''; + if (el.tagName !== tagName) { + return false; + } + } +} + +/** + * Matcher class to make CSS match + * + * @class Matcher + */ +export default class Matcher { + private matchers: MatherFunction[]; + private nextMatch = 0; + /** + * Creates an instance of Matcher. + * @param {string} selector + * + * @memberof Matcher + */ + constructor(selector: string) { + functionCache.f5 = functionCache.f5; + this.matchers = selector.split(' ').map((matcher) => { + if (pMatchFunctionCache[matcher]) + return pMatchFunctionCache[matcher]; + const parts = matcher.split('.'); + const tagName = parts[0]; + const classes = parts.slice(1).sort(); + // let source = '"use strict";'; + let function_name = 'f'; + let attr_key = ''; + let value = ''; + if (tagName && tagName !== '*') { + let reg: RegExpMatchArray; + if (tagName.startsWith('#')) { + // source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1 + function_name += '1'; + } else { + reg = /^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/.exec(tagName); + if (reg) { + attr_key = reg[1]; + let method = reg[2]; + if (method !== '=' && method !== '!=') { + throw new Error('Selector not supported, Expect [key${op}value].op must be =,!='); + } + if (method === '=') { + method = '=='; + } + value = reg[7] || reg[8]; + + // source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2 + function_name += '2'; + } else { + // source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3 + function_name += '3'; + } + } + } + if (classes.length > 0) { + // source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4 + function_name += '4'; + } + // source += 'return true;';// 5 + function_name += '5'; + const obj = { + func: functionCache[function_name], + tagName: tagName || '', + classes: classes || '', + attr_key: attr_key || '', + value: value || '' + } + // source = source || ''; + return pMatchFunctionCache[matcher] = obj as MatherFunction; + }); + } + /** + * Trying to advance match pointer + * @param {HTMLElement} el element to make the match + * @return {bool} true when pointer advanced. + */ + advance(el: HTMLElement) { + if (this.nextMatch < this.matchers.length && + this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) { + this.nextMatch++; + return true; + } + return false; + } + /** + * Rewind the match pointer + */ + rewind() { + this.nextMatch--; + } + /** + * Trying to determine if match made. + * @return {bool} true when the match is made + */ + get matched() { + return this.nextMatch === this.matchers.length; + } + /** + * Rest match pointer. + * @return {[type]} [description] + */ + reset() { + this.nextMatch = 0; + } + /** + * flush cache to free memory + */ + flushCache() { + pMatchFunctionCache = {}; + } +} diff --git a/src/nodes/comment.ts b/src/nodes/comment.ts new file mode 100644 index 0000000..820c3e9 --- /dev/null +++ b/src/nodes/comment.ts @@ -0,0 +1,28 @@ +import { decode } from 'he'; +import Node from './node'; +import NodeType from './type'; + +export default class CommentNode extends Node { + constructor(value: string) { + super(); + this.rawText = value; + } + + /** + * Node Type declaration. + * @type {Number} + */ + nodeType = NodeType.COMMENT_NODE; + + /** + * Get unescaped text value of current node and its children. + * @return {string} text content + */ + get text() { + return decode(this.rawText); + } + + toString() { + return ``; + } +} diff --git a/src/nodes/html.ts b/src/nodes/html.ts new file mode 100644 index 0000000..d9210aa --- /dev/null +++ b/src/nodes/html.ts @@ -0,0 +1,494 @@ +import { decode } from 'he'; +import Node from './node'; +import NodeType from './type'; +import TextNode from './text'; +import Matcher from '../matcher'; +import { parse } from '../index'; +import arr_back from '../back'; + +export interface KeyAttributes { + id?: string; + class?: string; +} + +export interface Attributes { + [key: string]: string; +} + +export interface RawAttributes { + [key: string]: string; +} + +const kBlockElements = { + div: true, + p: true, + // ul: true, + // ol: true, + li: true, + // table: true, + // tr: true, + td: true, + section: true, + br: true +}; + +/** + * HTMLElement, which contains a set of children. + * + * Note: this is a minimalist implementation, no complete tree + * structure provided (no parentNode, nextSibling, + * previousSibling etc). + * @class HTMLElement + * @extends {Node} + */ +export default class HTMLElement extends Node { + private _attrs: Attributes; + private _rawAttrs: RawAttributes; + public id: string; + public classNames = [] as string[]; + /** + * Node Type declaration. + */ + public nodeType = NodeType.ELEMENT_NODE; + /** + * Creates an instance of HTMLElement. + * @param keyAttrs id and class attribute + * @param [rawAttrs] attributes in string + * + * @memberof HTMLElement + */ + constructor(public tagName: string, keyAttrs: KeyAttributes, private rawAttrs = '', public parentNode = null as Node) { + super(); + this.rawAttrs = rawAttrs || ''; + this.parentNode = parentNode || null; + this.childNodes = []; + if (keyAttrs.id) { + this.id = keyAttrs.id; + } + if (keyAttrs.class) { + this.classNames = keyAttrs.class.split(/\s+/); + } + } + /** + * Remove Child element from childNodes array + * @param {HTMLElement} node node to remove + */ + public removeChild(node: Node) { + this.childNodes = this.childNodes.filter((child) => { + return (child !== node); + }); + } + /** + * Exchanges given child with new child + * @param {HTMLElement} oldNode node to exchange + * @param {HTMLElement} newNode new node + */ + public exchangeChild(oldNode: Node, newNode: Node) { + let idx = -1; + for (let i = 0; i < this.childNodes.length; i++) { + if (this.childNodes[i] === oldNode) { + idx = i; + break; + } + } + this.childNodes[idx] = newNode; + } + /** + * Get escpaed (as-it) text value of current node and its children. + * @return {string} text content + */ + get rawText() { + return this.childNodes.reduce((pre, cur) => { + return pre += cur.rawText; + }, ''); + } + /** + * Get unescaped text value of current node and its children. + * @return {string} text content + */ + get text() { + return decode(this.rawText); + } + /** + * Get structured Text (with '\n' etc.) + * @return {string} structured text + */ + get structuredText() { + let currentBlock = [] as string[]; + const blocks = [currentBlock]; + function dfs(node: Node) { + if (node.nodeType === NodeType.ELEMENT_NODE) { + if (kBlockElements[(node as HTMLElement).tagName]) { + if (currentBlock.length > 0) { + blocks.push(currentBlock = []); + } + node.childNodes.forEach(dfs); + if (currentBlock.length > 0) { + blocks.push(currentBlock = []); + } + } else { + node.childNodes.forEach(dfs); + } + } else if (node.nodeType === NodeType.TEXT_NODE) { + if ((node as TextNode).isWhitespace) { + // Whitespace node, postponed output + (currentBlock as any).prependWhitespace = true; + } else { + let text = node.text; + if ((currentBlock as any).prependWhitespace) { + text = ' ' + text; + (currentBlock as any).prependWhitespace = false; + } + currentBlock.push(text); + } + } + } + dfs(this); + return blocks + .map(function (block) { + // Normalize each line's whitespace + return block.join('').trim().replace(/\s{2,}/g, ' '); + }) + .join('\n').replace(/\s+$/, ''); // trimRight; + } + + public toString() { + const tag = this.tagName; + if (tag) { + const is_un_closed = /^meta$/i.test(tag); + const is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag); + const attrs = this.rawAttrs ? ' ' + this.rawAttrs : ''; + if (is_un_closed) { + return `<${tag}${attrs}>`; + } else if (is_self_closed) { + return `<${tag}${attrs} />`; + } else { + return `<${tag}${attrs}>${this.innerHTML}`; + } + } else { + return this.innerHTML; + } + } + + get innerHTML() { + return this.childNodes.map((child) => { + return child.toString(); + }).join(''); + } + + public set_content(content: string | Node | Node[]) { + if (content instanceof Node) { + content = [content]; + } else if (typeof content == 'string') { + const r = parse(content); + content = r.childNodes.length ? r.childNodes : [new TextNode(content)]; + } + this.childNodes = content; + } + + get outerHTML() { + return this.toString(); + } + + /** + * Trim element from right (in block) after seeing pattern in a TextNode. + * @param {RegExp} pattern pattern to find + * @return {HTMLElement} reference to current node + */ + public trimRight(pattern: RegExp) { + for (let i = 0; i < this.childNodes.length; i++) { + const childNode = this.childNodes[i]; + if (childNode.nodeType === NodeType.ELEMENT_NODE) { + (childNode as HTMLElement).trimRight(pattern); + } else { + const index = childNode.rawText.search(pattern); + if (index > -1) { + childNode.rawText = childNode.rawText.substr(0, index); + // trim all following nodes. + this.childNodes.length = i + 1; + } + } + } + return this; + } + /** + * Get DOM structure + * @return {string} strucutre + */ + get structure() { + const res = [] as string[]; + let indention = 0; + function write(str: string) { + res.push(' '.repeat(indention) + str); + } + function dfs(node: HTMLElement) { + const idStr = node.id ? ('#' + node.id) : ''; + const classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : ''; + write(node.tagName + idStr + classStr); + indention++; + node.childNodes.forEach((childNode) => { + if (childNode.nodeType === NodeType.ELEMENT_NODE) { + dfs(childNode as HTMLElement); + } else if (childNode.nodeType === NodeType.TEXT_NODE) { + if (!(childNode as TextNode).isWhitespace) + write('#text'); + } + }); + indention--; + } + dfs(this); + return res.join('\n'); + } + + /** + * Remove whitespaces in this sub tree. + * @return {HTMLElement} pointer to this + */ + public removeWhitespace() { + let o = 0; + this.childNodes.forEach((node) => { + if (node.nodeType === NodeType.TEXT_NODE) { + if ((node as TextNode).isWhitespace) { + return; + } + node.rawText = node.rawText.trim(); + } else if (node.nodeType === NodeType.ELEMENT_NODE) { + (node as HTMLElement).removeWhitespace(); + } + this.childNodes[o++] = node; + }); + this.childNodes.length = o; + return this; + } + + /** + * Query CSS selector to find matching nodes. + * @param {string} selector Simplified CSS selector + * @param {Matcher} selector A Matcher instance + * @return {HTMLElement[]} matching elements + */ + public querySelectorAll(selector: string | Matcher): HTMLElement[] { + let matcher: Matcher; + if (selector instanceof Matcher) { + matcher = selector; + matcher.reset(); + } else { + if (selector.includes(',')) { + const selectors = selector.split(','); + return Array.from(selectors.reduce((pre, cur) => { + const result = this.querySelectorAll(cur.trim()); + return result.reduce((p, c) => { + return p.add(c); + }, pre); + }, new Set())); + } + matcher = new Matcher(selector); + } + const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[]; + return this.childNodes.reduce((res, cur) => { + stack.push([cur, 0, false]); + while (stack.length) { + const state = arr_back(stack); + const el = state[0]; + if (state[1] === 0) { + // Seen for first time. + if (el.nodeType !== NodeType.ELEMENT_NODE) { + stack.pop(); + continue; + } + state[2] = matcher.advance(el as HTMLElement); + if (state[2]) { + if (matcher.matched) { + res.push(el as HTMLElement); + // no need to go further. + matcher.rewind(); + stack.pop(); + continue; + } + } + } + if (state[1] < el.childNodes.length) { + stack.push([el.childNodes[state[1]++], 0, false]); + } else { + if (state[2]) { + matcher.rewind(); + } + stack.pop(); + } + } + return res; + }, [] as HTMLElement[]); + } + + /** + * Query CSS Selector to find matching node. + * @param {string} selector Simplified CSS selector + * @param {Matcher} selector A Matcher instance + * @return {HTMLElement} matching node + */ + public querySelector(selector: string | Matcher) { + let matcher: Matcher; + if (selector instanceof Matcher) { + matcher = selector; + matcher.reset(); + } else { + matcher = new Matcher(selector); + } + const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[]; + for (const node of this.childNodes) { + stack.push([node, 0, false]); + while (stack.length) { + const state = arr_back(stack); + const el = state[0]; + if (state[1] === 0) { + // Seen for first time. + if (el.nodeType !== NodeType.ELEMENT_NODE) { + stack.pop(); + continue; + } + state[2] = matcher.advance(el as HTMLElement); + if (state[2]) { + if (matcher.matched) { + return el; + } + } + } + if (state[1] < el.childNodes.length) { + stack.push([el.childNodes[state[1]++], 0, false]); + } else { + if (state[2]) + matcher.rewind(); + stack.pop(); + } + } + } + return null; + } + + /** + * Append a child node to childNodes + * @param {Node} node node to append + * @return {Node} node appended + */ + public appendChild(node: T) { + // node.parentNode = this; + this.childNodes.push(node); + if (node instanceof HTMLElement) { + node.parentNode = this; + } + return node; + } + + /** + * Get first child node + * @return {Node} first child node + */ + get firstChild() { + return this.childNodes[0]; + } + + /** + * Get last child node + * @return {Node} last child node + */ + get lastChild() { + return arr_back(this.childNodes); + } + + /** + * Get attributes + * @return {Object} parsed and unescaped attributes + */ + get attributes() { + if (this._attrs) { + return this._attrs; + } + this._attrs = {}; + const attrs = this.rawAttributes; + for (const key in attrs) { + const val = attrs[key] || ''; + this._attrs[key] = decode(val.replace(/^['"]/, '').replace(/['"]$/, '')); + } + return this._attrs; + } + + /** + * Get escaped (as-it) attributes + * @return {Object} parsed attributes + */ + get rawAttributes() { + if (this._rawAttrs) + return this._rawAttrs; + const attrs = {} as RawAttributes; + if (this.rawAttrs) { + const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*("(?:[^"]*)"|'(?:[^']*)'|(?:\S+)))?/ig; + let match: RegExpExecArray; + while (match = re.exec(this.rawAttrs)) { + attrs[match[1]] = match[2] || null; + } + } + this._rawAttrs = attrs; + return attrs; + } + + /** + * Set an attribute value to the HTMLElement + * @param {string} key The attribute name + * @param {string|number} value The value to set, or null / undefined to remove an attribute + */ + setAttribute(key: string, value: string | number) { + // Update the this.attributes + if (this._attrs) { + delete this._attrs; + } + const attrs = this.rawAttributes; // ref this._rawAttrs + if (value === undefined || value === null) { + delete attrs[key]; + } else { + attrs[key] = JSON.stringify(value); + // if (typeof value === 'string') { + // attrs[key] = JSON.stringify(encode(value));//??? should we encode value here? + // } else { + // attrs[key] = JSON.stringify(value); + // } + } + // Update rawString + this.rawAttrs = Object.keys(attrs).map((name) => { + const val = attrs[name]; + if (val === undefined || val === null) { + return name; + } else { + return name + '=' + val; + } + }).join(' '); + } + + /** + * Replace all the attributes of the HTMLElement by the provided attributes + * @param {Attributes} attributes the new attribute set + */ + setAttributes(attributes: Attributes) { + // Update the this.attributes + if (this._attrs) { + delete this._attrs; + } + // Update the raw attributes map + if (this._rawAttrs) { + delete this._rawAttrs; + } + // Update rawString + this.rawAttrs = Object.keys(attributes).map((name) => { + const val = attributes[name]; + if (val === undefined || val === null) { + return name; + } else { + return name + '=' + JSON.stringify(val); + // if (typeof val === 'string') { + // return name + '=' + JSON.stringify(encode(val)); //??? should we encode value here? + // } else { + // return name + '=' + JSON.stringify(val); + // } + } + }).join(' '); + } +} diff --git a/src/nodes/node.ts b/src/nodes/node.ts new file mode 100644 index 0000000..ea66cb5 --- /dev/null +++ b/src/nodes/node.ts @@ -0,0 +1,12 @@ +import NodeType from './type'; + +/** + * Node Class as base class for TextNode and HTMLElement. + */ +export default abstract class Node { + nodeType: NodeType; + childNodes = [] as Node[]; + text: string; + rawText: string; + abstract toString(): string; +} diff --git a/src/nodes/text.ts b/src/nodes/text.ts new file mode 100644 index 0000000..96c081b --- /dev/null +++ b/src/nodes/text.ts @@ -0,0 +1,40 @@ +import { decode } from 'he'; +import NodeType from './type'; +import Node from './node'; + +/** + * TextNode to contain a text element in DOM tree. + * @param {string} value [description] + */ +export default class TextNode extends Node { + constructor(value: string) { + super(); + this.rawText = value; + } + + /** + * Node Type declaration. + * @type {Number} + */ + nodeType = NodeType.TEXT_NODE; + + /** + * Get unescaped text value of current node and its children. + * @return {string} text content + */ + get text() { + return decode(this.rawText); + } + + /** + * Detect if the node contains only white space. + * @return {bool} + */ + get isWhitespace() { + return /^(\s| )*$/.test(this.rawText); + } + + toString() { + return this.text; + } +} diff --git a/src/nodes/type.ts b/src/nodes/type.ts new file mode 100644 index 0000000..8863b6c --- /dev/null +++ b/src/nodes/type.ts @@ -0,0 +1,7 @@ +enum NodeType { + ELEMENT_NODE = 1, + TEXT_NODE = 3, + COMMENT_NODE = 8 +} + +export default NodeType; diff --git a/test/html.js b/test/html.js index bb1ed38..efa1597 100644 --- a/test/html.js +++ b/test/html.js @@ -3,14 +3,12 @@ var fs = require('fs'); var util = require('util'); var HTMLParser = require('../dist'); +var Matcher = require('../dist/matcher').default; +var HTMLElement = require('../dist/nodes/html').default; +var TextNode = require('../dist/nodes/text').default; +var CommentNode = require('../dist/nodes/comment').default; describe('HTML Parser', function () { - - var Matcher = HTMLParser.Matcher; - var HTMLElement = HTMLParser.HTMLElement; - var TextNode = HTMLParser.TextNode; - var CommentNode = HTMLParser.CommentNode; - describe('Matcher', function () { it('should match corrent elements', function () { var matcher = new Matcher('#id .a a.b *.a.b .a.b * a'); @@ -372,7 +370,8 @@ describe('HTML Parser', function () { 'c': '12', d: '&&<>foo' }); - root.firstChild.toString().should.eql('

'); + root.firstChild.toString().should.eql('

'); + // root.firstChild.toString().should.eql('

'); }); });