Skip to content

Commit

Permalink
Merge 44dba5e into 0207ee2
Browse files Browse the repository at this point in the history
  • Loading branch information
remarkablemark committed Dec 13, 2020
2 parents 0207ee2 + 44dba5e commit a1ac9bc
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 101 deletions.
2 changes: 2 additions & 0 deletions karma.conf.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module.exports = config => {
files: [
'dist/htmlparser2.js',
'lib/*.js',
'node_modules/domhandler/lib/node.js',
'test/cases/html.js',
'test/client/*.js',
'test/helpers/*.js'
Expand All @@ -26,6 +27,7 @@ module.exports = config => {
preprocessors: {
'dist/**/*.js': ['commonjs'],
'lib/**/*.js': ['commonjs'],
'node_modules/domhandler/lib/node.js': ['commonjs'],
'test/**/*.js': ['commonjs']
},

Expand Down
4 changes: 2 additions & 2 deletions lib/html-to-dom-client.d.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// TypeScript Version: 4.1

import { DomElement } from 'domhandler';
import { DataNode, Element } from 'domhandler';

/**
* Parses HTML string to DOM nodes in browser.
*
* @param html - HTML markup.
* @return - DOM elements.
*/
export default function HTMLDOMParser(html: string): DomElement[];
export default function HTMLDOMParser(html: string): Array<DataNode | Element>;
8 changes: 4 additions & 4 deletions lib/html-to-dom-server.d.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
// TypeScript Version: 4.1

import { DomHandlerOptions, DomElement } from 'domhandler';
import { DataNode, DomHandlerOptions, Element } from 'domhandler';

/**
* Parses HTML string to DOM nodes in Node.js.
*
* This is the same method as `require('htmlparser2').parseDOM`
* https://github.com/fb55/htmlparser2/blob/v3.9.1/lib/index.js#L39-L43
* https://github.com/fb55/htmlparser2/blob/v4.0.0/src/index.ts#L18-L22
*
* @param html - HTML markup.
* @param options - Parser options (https://github.com/fb55/domhandler/tree/v2.4.2#readme).
* @param options - Parser options (https://github.com/fb55/domhandler/tree/v3.0.0#readme).
* @return - DOM elements.
*/
export default function HTMLDOMParser(
html: string,
options?: DomHandlerOptions
): DomElement[];
): Array<DataNode | Element>;
14 changes: 7 additions & 7 deletions lib/html-to-dom-server.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
var Parser = require('htmlparser2/lib/Parser');
var DomHandler = require('domhandler');
var Parser = require('htmlparser2/lib/Parser').Parser;
var DomHandler = require('domhandler').DomHandler;

/**
* Parses HTML string to DOM nodes in Node.js.
*
* This is the same method as `require('htmlparser2').parseDOM`
* https://github.com/fb55/htmlparser2/blob/v3.9.1/lib/index.js#L39-L43
* https://github.com/fb55/htmlparser2/blob/v4.0.0/src/index.ts#L18-L22
*
* @param {String} html - HTML markup.
* @param {Object} [options] - Parser options (https://github.com/fb55/domhandler/tree/v2.4.2#readme).
* @return {DomElement[]} - DOM elements.
* @param {string} html - HTML markup.
* @param {DomHandlerOptions} [options] - Parser options (https://github.com/fb55/domhandler/tree/v3.0.0#readme).
* @return {DomElement[]} - DOM elements.
*/
function HTMLDOMParser(html, options) {
if (typeof html !== 'string') {
Expand All @@ -20,7 +20,7 @@ function HTMLDOMParser(html, options) {
return [];
}

var handler = new DomHandler(options);
var handler = new DomHandler(undefined, options);
new Parser(handler, options).end(html);
return handler.dom;
}
Expand Down
6 changes: 3 additions & 3 deletions lib/utilities.d.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// TypeScript Version: 4.1

import { DomElement } from 'domhandler';
import { DataNode, Element } from 'domhandler';

/**
* Formats DOM attributes to a hash map.
Expand All @@ -22,9 +22,9 @@ export function formatAttributes(
*/
export function formatDOM(
nodes: NodeList,
parentNode?: DomElement,
parentNode?: DataNode | Element,
directive?: string
): DomElement[];
): Array<DataNode | Element>;

/**
* Detects if browser is Internet Explorer.
Expand Down
118 changes: 53 additions & 65 deletions lib/utilities.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
var CASE_SENSITIVE_TAG_NAMES = require('./constants').CASE_SENSITIVE_TAG_NAMES;
var constants = require('./constants');
var domhandler = require('domhandler/lib/node');

var CASE_SENSITIVE_TAG_NAMES = constants.CASE_SENSITIVE_TAG_NAMES;

var Element = domhandler.Element;
var DataNode = domhandler.DataNode;
var ProcessingInstruction = domhandler.ProcessingInstruction;

var caseSensitiveTagNamesMap = {};
var tagName;

for (var i = 0, len = CASE_SENSITIVE_TAG_NAMES.length; i < len; i++) {
tagName = CASE_SENSITIVE_TAG_NAMES[i];
caseSensitiveTagNamesMap[tagName.toLowerCase()] = tagName;
Expand Down Expand Up @@ -53,91 +61,71 @@ function formatTagName(tagName) {
/**
* Formats the browser DOM nodes to mimic the output of `htmlparser2.parseDOM()`.
*
* @param {NodeList} nodes - DOM nodes.
* @param {object} [parentNode] - Formatted parent node.
* @param {string} [directive] - Directive.
* @return {DomElement[]} - Formatted DOM object.
* @param {NodeList} nodes - DOM nodes.
* @param {DataNode|Element} [parentNode] - Formatted parent node.
* @param {string} [directive] - Directive.
* @return {Array<DomNode|Element>} - Formatted DOM object.
*/
function formatDOM(nodes, parentNode, directive) {
function formatDOM(domNodes, parentNode, directive) {
parentNode = parentNode || null;

var result = [];
var domNode;
var node;
var prevNode;
var nodeObj;

// `NodeList` is array-like
for (var i = 0, len = nodes.length; i < len; i++) {
node = nodes[i];
// reset
nodeObj = {
next: null,
prev: result[i - 1] || null,
parent: parentNode
};

// set the next node for the previous node (if applicable)
prevNode = result[i - 1];
if (prevNode) {
prevNode.next = nodeObj;
}
var output = [];

// set the node name if it's not "#text" or "#comment"
// e.g., "div"
if (node.nodeName[0] !== '#') {
nodeObj.name = formatTagName(node.nodeName);
// also, nodes of type "tag" have "attribs"
nodeObj.attribs = {}; // default
if (node.attributes && node.attributes.length) {
nodeObj.attribs = formatAttributes(node.attributes);
}
}
for (var i = 0, len = domNodes.length; i < len; i++) {
domNode = domNodes[i];

// set the node type
// e.g., "tag"
switch (node.nodeType) {
// 1 = element
// set the node data given the type
switch (domNode.nodeType) {
case 1:
if (nodeObj.name === 'script' || nodeObj.name === 'style') {
nodeObj.type = nodeObj.name;
} else {
nodeObj.type = 'tag';
}
// recursively format the children
nodeObj.children = formatDOM(node.childNodes, nodeObj);
// script, style, or tag
node = new Element(
formatTagName(domNode.nodeName),
formatAttributes(domNode.attributes)
);
node.children = formatDOM(domNode.childNodes, node);
break;
// 2 = attribute
// 3 = text

case 3:
nodeObj.type = 'text';
nodeObj.data = node.nodeValue;
node = new DataNode('text', domNode.nodeValue);
break;
// 8 = comment

case 8:
nodeObj.type = 'comment';
nodeObj.data = node.nodeValue;
node = new DataNode('comment', domNode.nodeValue);
break;
}

result.push(nodeObj);
// set next for previous node
prevNode = output[i - 1] || null;
if (prevNode) {
prevNode.next = node;
}

// set properties for current node
node.parent = parentNode;
node.prev = prevNode;
node.next = null;

output.push(node);
}

if (directive) {
result.unshift({
name: directive.substring(0, directive.indexOf(' ')).toLowerCase(),
data: directive,
type: 'directive',
next: result[0] ? result[0] : null,
prev: null,
parent: parentNode
});

if (result[1]) {
result[1].prev = result[0];
node = new ProcessingInstruction(
directive.substring(0, directive.indexOf(' ')).toLowerCase(),
directive
);
node.next = output[0] || null;
node.parent = parentNode;
output.unshift(node);

if (output[1]) {
output[1].prev = output[0];
}
}

return result;
return output;
}

/**
Expand Down
5 changes: 2 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@
"pojo"
],
"dependencies": {
"@types/domhandler": "2.4.1",
"domhandler": "2.4.2",
"htmlparser2": "3.10.1"
"domhandler": "3.0.0",
"htmlparser2": "4.0.0"
},
"devDependencies": {
"@commitlint/cli": "^11.0.0",
Expand Down
10 changes: 10 additions & 0 deletions test/helpers/run-tests.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
var isKarma =
typeof window === 'object' && typeof window.__karma__ === 'object';

/**
* Runs tests.
*
Expand All @@ -18,6 +21,13 @@ function runTests(assert, actualParser, expectedParser, testCases) {
var actualOutput = actualParser(testCase.data, parserOptions);
var expectedOutput = expectedParser(testCase.data, parserOptions);

// use `JSON.decycle` since `assert.deepEqual` fails
// when instance types are different in the browser
if (isKarma) {
actualOutput = JSON.decycle(actualOutput);
expectedOutput = JSON.decycle(expectedOutput);
}

assert.deepEqual(actualOutput, expectedOutput);
});
});
Expand Down
6 changes: 3 additions & 3 deletions test/server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ const html = '<html>';

describe('server parser', () => {
// before
mock('htmlparser2/lib/Parser', Parser);
mock('domhandler', DomHandler);
mock('htmlparser2/lib/Parser', { Parser });
mock('domhandler', { DomHandler });
const parse = require('../..');

it('calls `DomHandler` and `Parser`', () => {
Expand All @@ -45,7 +45,7 @@ describe('server parser', () => {
it('passes options to `DomHandler` and `Parser`', () => {
const options = { decodeEntities: true };
parse(html, options);
expect(DomHandler.calledWith(options)).to.equal(true);
expect(DomHandler.calledWith(undefined, options)).to.equal(true);
expect(Parser.calledWith(DomHandler, options));
});

Expand Down
25 changes: 11 additions & 14 deletions test/types/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import parseDOM from 'html-dom-parser';
import parse from 'html-dom-parser';

// $ExpectType DomElement[]
parseDOM('<div>text</div>');
// $ExpectType (DataNode | Element)[]
parse('<div>text</div>');

// $ExpectType DomElement[]
parseDOM('<div>text</div>', { normalizeWhitespace: true });
// $ExpectType (DataNode | Element)[]
parse('<div>text</div>', { normalizeWhitespace: true });

// $ExpectType DomElement[]
parseDOM('<div>text</div>', { withDomLvl1: true });
// $ExpectType (DataNode | Element)[]
parse('<div>text</div>', { withStartIndices: true });

// $ExpectType DomElement[]
parseDOM('<div>text</div>', { withStartIndices: true });
// $ExpectType (DataNode | Element)[]
parse('<div>text</div>', { withEndIndices: true });

// $ExpectType DomElement[]
parseDOM('<div>text</div>', { withEndIndices: true });

// $ExpectType DomElement[]
parseDOM('');
// $ExpectType (DataNode | Element)[]
parse('');

0 comments on commit a1ac9bc

Please sign in to comment.