Skip to content

Commit

Permalink
Added options parameter to DefaultHandler
Browse files Browse the repository at this point in the history
  • Loading branch information
tautologistics committed Apr 22, 2010
1 parent eb32a9e commit ee028ab
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 13 deletions.
23 changes: 20 additions & 3 deletions node-htmlparser.js
Expand Up @@ -418,8 +418,14 @@ function Parser (handler) {
}
Parser.prototype.HandleError = Parser.prototype.handleError; //TODO: remove next version

function DefaultHandler (callback) {
//TODO: add support for options: ignoreWhitespace, verbose (keep data for tags and raw for all)
function DefaultHandler (callback, options) {
this.reset();
this._options = options ? options : { };
if (this._options.ignoreWhitespace == undefined)
this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
if (this._options.verbose == undefined)
this._options.verbose = true; //Keep data property for tags and raw property for all
if ((typeof callback) == "function")
this._callback = callback;
}
Expand All @@ -442,6 +448,8 @@ function DefaultHandler (callback) {
, param: 1
, embed: 1
}
//Regex to detect whitespace only text nodes
DefaultHandler.reWhitespace = /^\s*$/;

//**Public**//
//Properties//
Expand All @@ -465,6 +473,9 @@ function DefaultHandler (callback) {
this.handleElement(element);
}
DefaultHandler.prototype.writeText = function DefaultHandler$writeText (element) {
if (this._options.ignoreWhitespace)
if (DefaultHandler.reWhitespace.test(element.data))
return;
this.handleElement(element);
}
DefaultHandler.prototype.writeComment = function DefaultHandler$writeComment (element) {
Expand All @@ -479,6 +490,7 @@ function DefaultHandler (callback) {

//**Private**//
//Properties//
DefaultHandler.prototype._options = null; //Handler options for how to behave
DefaultHandler.prototype._callback = null; //Callback to respond to when parsing done
DefaultHandler.prototype._done = false; //Flag indicating whether handler has been notified of parsing completed
DefaultHandler.prototype._tagStack = null; //List of parents to the currently element being processed
Expand All @@ -494,8 +506,13 @@ function DefaultHandler (callback) {
DefaultHandler.prototype.handleElement = function DefaultHandler$handleElement (element) {
if (this._done)
this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
// delete element.raw; //FIXME: Serious performance problem here
// element.raw = null; //FIXME: Not clean
if (!this._options.verbose) {
// element.raw = null; //FIXME: Not clean
//FIXME: Serious performance problem using delete
delete element.raw;
if (element.type == "tag" || element.type == "script" || element.type == "comment")
delete element.data;
}
if (!this._tagStack.last()) { //There are no parent elements
//If the element can be a container, add it to the tag stack and the top level list
if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
Expand Down
12 changes: 7 additions & 5 deletions runtests.html
Expand Up @@ -27,23 +27,25 @@
<script language="JavaScript" src="tests/12-text_only.js"></script>
<script language="JavaScript" src="tests/13-comment_in_text.js"></script>
<script language="JavaScript" src="tests/14-comment_in_text_in_script.js"></script>
<script language="JavaScript" src="tests/15-non-verbose.js"></script>
<script language="JavaScript" src="tests/16-ignore_whitespace.js"></script>
<!-- //TODO: dynamic loading of test files -->
</head>
<body style="font-size: small; font-family:Arial, Helvetica, sans-serif;">

<script language="JavaScript">
var chunkSize = 5;
var handler = new Tautologistics.NodeHtmlParser.DefaultHandler(function (error) {
if (error)
document.write("<hr>Handler error: " + error + "<hr>");
});
var parser = new Tautologistics.NodeHtmlParser.Parser(handler);
var testCount = 0;
var failedCount = 0;
while (Tautologistics.NodeHtmlParser.Tests.length) {
testCount++;
var test = Tautologistics.NodeHtmlParser.Tests.shift();
try {
var handler = new Tautologistics.NodeHtmlParser.DefaultHandler(function (error) {
if (error)
document.write("<hr>Handler error: " + error + "<hr>");
}, test.options);
var parser = new Tautologistics.NodeHtmlParser.Parser(handler);
document.write("<b>" + test.name + "</b>: ");
parser.ParseComplete(test.html);
var resultComplete = handler.dom;
Expand Down
10 changes: 5 additions & 5 deletions runtests.js
Expand Up @@ -29,17 +29,17 @@ var chunkSize = 5;
var testFiles = fs.readdirSync(testFolder);
var testCount = 0;
var failedCount = 0;
var handler = new htmlparser.DefaultHandler(function (error) {
if (error)
sys.puts("Handler error: " + error);
});
var parser = new htmlparser.Parser(handler);
for (var i in testFiles) {
testCount++;
var fileParts = testFiles[i].split(".");
fileParts.pop();
var moduleName = fileParts.join(".");
var test = require(testFolder + "/" + moduleName);
var handler = new htmlparser.DefaultHandler(function (error) {
if (error)
sys.puts("Handler error: " + error);
}, test.options);
var parser = new htmlparser.Parser(handler);
parser.ParseComplete(test.html);
var resultComplete = handler.dom;
var chunkPos = 0;
Expand Down
43 changes: 43 additions & 0 deletions tests/15-non-verbose.js
@@ -0,0 +1,43 @@
(function () {

function RunningInNode () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
this.Tautologistics.NodeHtmlParser = {};
if (!this.Tautologistics.NodeHtmlParser.Tests)
this.Tautologistics.NodeHtmlParser.Tests = [];
exports = {};
this.Tautologistics.NodeHtmlParser.Tests.push(exports);
}

exports.name = "Option 'verbose' set to 'false'";
exports.html = "<\n font \n size='14' \n>the text<\n / \nfont \n>";
exports.options = { verbose: false };
exports.expected =
[ { type: 'tag'
, name: 'font'
, attribs: { size: '14' }
, children:
[ { data: 'the text'
, type: 'text'
}
]
}
];

})();
68 changes: 68 additions & 0 deletions tests/16-ignore_whitespace.js
@@ -0,0 +1,68 @@
(function () {

function RunningInNode () {
return(
(typeof require) == "function"
&&
(typeof exports) == "object"
&&
(typeof module) == "object"
&&
(typeof __filename) == "string"
&&
(typeof __dirname) == "string"
);
}

if (!RunningInNode()) {
if (!this.Tautologistics)
this.Tautologistics = {};
if (!this.Tautologistics.NodeHtmlParser)
this.Tautologistics.NodeHtmlParser = {};
if (!this.Tautologistics.NodeHtmlParser.Tests)
this.Tautologistics.NodeHtmlParser.Tests = [];
exports = {};
this.Tautologistics.NodeHtmlParser.Tests.push(exports);
}

exports.name = "Options 'ignoreWhitespace' set to 'true'";
exports.html = "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>";
exports.options = { ignoreWhitespace: true };
exports.expected =
[ { raw: 'Line one\n'
, data: 'Line one\n'
, type: 'text'
}
, { raw: 'br'
, data: 'br'
, type: 'tag'
, name: 'br'
}
, { raw: 'br'
, data: 'br'
, type: 'tag'
, name: 'br'
}
, { raw: '\nline two'
, data: '\nline two'
, type: 'text'
}
, { raw: 'font'
, data: 'font'
, type: 'tag'
, name: 'font'
, children:
[ { raw: 'br'
, data: 'br'
, type: 'tag'
, name: 'br'
}
, { raw: ' x '
, data: ' x '
, type: 'text'
}
]
}
];

})();

0 comments on commit ee028ab

Please sign in to comment.