Skip to content

Commit

Permalink
处理部分html特殊字符解析失败的问题
Browse files Browse the repository at this point in the history
  • Loading branch information
fan authored and fan committed Sep 19, 2022
1 parent 974be3c commit a964fda
Show file tree
Hide file tree
Showing 21 changed files with 2,675 additions and 28 deletions.
2 changes: 1 addition & 1 deletion build
Expand Up @@ -369,7 +369,7 @@ class Build{
};

new Build({
debug:true
debug:false
}).init();


2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "towxml",
"version": "3.2.3",
"version": "3.3.0",
"description": "HTML、Markdown转WXML(WeiXin Markup Language)渲染库",
"main": "./dist/index.js",
"scripts": {
Expand Down
494 changes: 492 additions & 2 deletions parse/parse2/Parser.js

Large diffs are not rendered by default.

905 changes: 903 additions & 2 deletions parse/parse2/Tokenizer.js

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions parse/parse2/domelementtype/index.js
@@ -0,0 +1,55 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Doctype = exports.CDATA = exports.Tag = exports.Style = exports.Script = exports.Comment = exports.Directive = exports.Text = exports.Root = exports.isTag = exports.ElementType = void 0;
/** Types of elements found in htmlparser2's DOM */
var ElementType;
(function (ElementType) {
/** Type for the root element of a document */
ElementType["Root"] = "root";
/** Type for Text */
ElementType["Text"] = "text";
/** Type for <? ... ?> */
ElementType["Directive"] = "directive";
/** Type for <!-- ... --> */
ElementType["Comment"] = "comment";
/** Type for <script> tags */
ElementType["Script"] = "script";
/** Type for <style> tags */
ElementType["Style"] = "style";
/** Type for Any tag */
ElementType["Tag"] = "tag";
/** Type for <![CDATA[ ... ]]> */
ElementType["CDATA"] = "cdata";
/** Type for <!doctype ...> */
ElementType["Doctype"] = "doctype";
})(ElementType = exports.ElementType || (exports.ElementType = {}));
/**
* Tests whether an element is a tag or not.
*
* @param elem Element to test
*/
function isTag(elem) {
return (elem.type === ElementType.Tag ||
elem.type === ElementType.Script ||
elem.type === ElementType.Style);
}
exports.isTag = isTag;
// Exports for backwards compatibility
/** Type for the root element of a document */
exports.Root = ElementType.Root;
/** Type for Text */
exports.Text = ElementType.Text;
/** Type for <? ... ?> */
exports.Directive = ElementType.Directive;
/** Type for <!-- ... --> */
exports.Comment = ElementType.Comment;
/** Type for <script> tags */
exports.Script = ElementType.Script;
/** Type for <style> tags */
exports.Style = ElementType.Style;
/** Type for Any tag */
exports.Tag = ElementType.Tag;
/** Type for <![CDATA[ ... ]]> */
exports.CDATA = ElementType.CDATA;
/** Type for <!doctype ...> */
exports.Doctype = ElementType.Doctype;
147 changes: 145 additions & 2 deletions parse/parse2/domhandler/index.js
@@ -1,2 +1,145 @@
/*! Project:无, Create:FWS 2020.01.08 21:48, Update:FWS 2020.01.08 21:48 */
"use strict";Object.defineProperty(exports,"__esModule",{value:!0});var node_1=require("./node");exports.Node=node_1.Node,exports.Element=node_1.Element,exports.DataNode=node_1.DataNode,exports.NodeWithChildren=node_1.NodeWithChildren;var reWhitespace=/\s+/g,defaultOpts={normalizeWhitespace:!1,withStartIndices:!1,withEndIndices:!1},DomHandler=function(){function t(t,e,o){this.dom=[],this._done=!1,this._tagStack=[],this._lastNode=null,this._parser=null,"function"==typeof e&&(o=e,e=defaultOpts),"object"==typeof t&&(e=t,t=undefined),this._callback=t||null,this._options=e||defaultOpts,this._elementCB=o||null}return t.prototype.onparserinit=function(t){this._parser=t},t.prototype.onreset=function(){this.dom=[],this._done=!1,this._tagStack=[],this._lastNode=null,this._parser=this._parser||null},t.prototype.onend=function(){this._done||(this._done=!0,this._parser=null,this.handleCallback(null))},t.prototype.onerror=function(t){this.handleCallback(t)},t.prototype.onclosetag=function(){this._lastNode=null;var t=this._tagStack.pop();t&&this._parser&&(this._options.withEndIndices&&(t.endIndex=this._parser.endIndex),this._elementCB&&this._elementCB(t))},t.prototype.onopentag=function(t,e){var o=new node_1.Element(t,e);this.addNode(o),this._tagStack.push(o)},t.prototype.ontext=function(t){var e=this._options.normalizeWhitespace,o=this._lastNode;if(o&&"text"===o.type)e?o.data=(o.data+t).replace(reWhitespace," "):o.data+=t;else{e&&(t=t.replace(reWhitespace," "));var n=new node_1.DataNode("text",t);this.addNode(n),this._lastNode=n}},t.prototype.oncomment=function(t){if(this._lastNode&&"comment"===this._lastNode.type)return void(this._lastNode.data+=t);var e=new node_1.DataNode("comment",t);this.addNode(e),this._lastNode=e},t.prototype.oncommentend=function(){this._lastNode=null},t.prototype.oncdatastart=function(){var t=new node_1.DataNode("text",""),e=new node_1.NodeWithChildren("cdata",[t]);this.addNode(e),t.parent=e,this._lastNode=t},t.prototype.oncdataend=function(){this._lastNode=null},t.prototype.onprocessinginstruction=function(t,e){var o=new node_1.ProcessingInstruction(t,e);this.addNode(o)},t.prototype.handleCallback=function(t){if("function"==typeof this._callback)this._callback(t,this.dom);else if(t)throw t},t.prototype.addNode=function(t){var e=this._tagStack[this._tagStack.length-1],o=e?e.children:this.dom,n=o[o.length-1];this._parser&&(this._options.withStartIndices&&(t.startIndex=this._parser.startIndex),this._options.withEndIndices&&(t.endIndex=this._parser.endIndex)),o.push(t),n&&(t.prev=n,n.next=t),e&&(t.parent=e),this._lastNode=null},t.prototype.addDataNode=function(t){this.addNode(t),this._lastNode=t},t}();exports.DomHandler=DomHandler,exports["default"]=DomHandler;
var domelementtype_1 = require("../domelementtype/index.js");
var node_js_1 = require("./node.js");
var defaultOpts = {
withStartIndices: false,
withEndIndices: false,
xmlMode: false,
};
var DomHandler = /** @class */ (function () {
/**
* @param callback Called once parsing has completed.
* @param options Settings for the handler.
* @param elementCB Callback whenever a tag is closed.
*/
function DomHandler(callback, options, elementCB) {
/** The elements of the DOM */
this.dom = [];
/** The root element for the DOM */
this.root = new node_js_1.Document(this.dom);
/** Indicated whether parsing has been completed. */
this.done = false;
/** Stack of open tags. */
this.tagStack = [this.root];
/** A data node that is still being written to. */
this.lastNode = null;
/** Reference to the parser instance. Used for location information. */
this.parser = null;
// Make it possible to skip arguments, for backwards-compatibility
if (typeof options === "function") {
elementCB = options;
options = defaultOpts;
}
if (typeof callback === "object") {
options = callback;
callback = undefined;
}
this.callback = callback !== null && callback !== void 0 ? callback : null;
this.options = options !== null && options !== void 0 ? options : defaultOpts;
this.elementCB = elementCB !== null && elementCB !== void 0 ? elementCB : null;
}
DomHandler.prototype.onparserinit = function (parser) {
this.parser = parser;
};
// Resets the handler back to starting state
DomHandler.prototype.onreset = function () {
this.dom = [];
this.root = new node_js_1.Document(this.dom);
this.done = false;
this.tagStack = [this.root];
this.lastNode = null;
this.parser = null;
};
// Signals the handler that parsing is done
DomHandler.prototype.onend = function () {
if (this.done)
return;
this.done = true;
this.parser = null;
this.handleCallback(null);
};
DomHandler.prototype.onerror = function (error) {
this.handleCallback(error);
};
DomHandler.prototype.onclosetag = function () {
this.lastNode = null;
var elem = this.tagStack.pop();
if (this.options.withEndIndices) {
elem.endIndex = this.parser.endIndex;
}
if (this.elementCB)
this.elementCB(elem);
};
DomHandler.prototype.onopentag = function (name, attribs) {
var type = this.options.xmlMode ? domelementtype_1.ElementType.Tag : undefined;
var element = new node_js_1.Element(name, attribs, undefined, type);
this.addNode(element);
this.tagStack.push(element);
};
DomHandler.prototype.ontext = function (data) {
var lastNode = this.lastNode;
if (lastNode && lastNode.type === domelementtype_1.ElementType.Text) {
lastNode.data += data;
if (this.options.withEndIndices) {
lastNode.endIndex = this.parser.endIndex;
}
}
else {
var node = new node_js_1.Text(data);
this.addNode(node);
this.lastNode = node;
}
};
DomHandler.prototype.oncomment = function (data) {
if (this.lastNode && this.lastNode.type === domelementtype_1.ElementType.Comment) {
this.lastNode.data += data;
return;
}
var node = new node_js_1.Comment(data);
this.addNode(node);
this.lastNode = node;
};
DomHandler.prototype.oncommentend = function () {
this.lastNode = null;
};
DomHandler.prototype.oncdatastart = function () {
var text = new node_js_1.Text("");
var node = new node_js_1.CDATA([text]);
this.addNode(node);
text.parent = node;
this.lastNode = text;
};
DomHandler.prototype.oncdataend = function () {
this.lastNode = null;
};
DomHandler.prototype.onprocessinginstruction = function (name, data) {
var node = new node_js_1.ProcessingInstruction(name, data);
this.addNode(node);
};
DomHandler.prototype.handleCallback = function (error) {
if (typeof this.callback === "function") {
this.callback(error, this.dom);
}
else if (error) {
throw error;
}
};
DomHandler.prototype.addNode = function (node) {
var parent = this.tagStack[this.tagStack.length - 1];
var previousSibling = parent.children[parent.children.length - 1];
if (this.options.withStartIndices) {
node.startIndex = this.parser.startIndex;
}
if (this.options.withEndIndices) {
node.endIndex = this.parser.endIndex;
}
parent.children.push(node);
if (previousSibling) {
node.prev = previousSibling;
previousSibling.next = node;
}
node.parent = parent;
this.lastNode = null;
};
return DomHandler;
}());
module.exports = DomHandler;

0 comments on commit a964fda

Please sign in to comment.