Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed position calculation in chunked mode

  • Loading branch information...
commit 60f8d24fdff4a002e13323f61d21ff10739d363a 1 parent 57f382d
@fgnass fgnass authored
Showing with 102 additions and 100 deletions.
  1. +36 −31 lib/node-htmlparser.js
  2. +66 −69 tests/22-position_data.js
View
67 lib/node-htmlparser.js
@@ -128,11 +128,12 @@ function Parser (handler, options) {
this._elementsCurrent = 0;
this._current = 0;
this._next = 0;
-// this._lines = {
-// offset: 1
-// , charOffset: 0
-// , inBuffer: 0
-// };
+ this._location = {
+ row: 0
+ , col: 0
+ , charOffset: 0
+ , inBuffer: 0
+ };
this._parseState = ElementType.Text;
this._prevTagSep = '';
this._tagStack = [];
@@ -149,7 +150,7 @@ function Parser (handler, options) {
Parser.prototype._elementsCurrent = 0; //Pointer to last element in _elements that has been processed
Parser.prototype._current = 0; //Position in data that has already been parsed
Parser.prototype._next = 0; //Position in data of the next tag marker (<>)
-// Parser.prototype._lines = null; //Position tracking for elements in a stream
+ Parser.prototype._location = null; //Position tracking for elements in a stream
Parser.prototype._parseState = ElementType.Text; //Current type of element being parsed
Parser.prototype._prevTagSep = ''; //Previous tag marker found
//Stack of element types previously encountered; keeps track of when
@@ -226,10 +227,6 @@ function Parser (handler, options) {
, type: this._parseState
};
-// if (this._options.includeLocation) {
-// element.location = this.getLocation();
-// }
-
var elementName = this.parseTagName(element.data);
//This section inspects the current tag stack and modifies the current
@@ -347,6 +344,9 @@ function Parser (handler, options) {
//Add all tags and non-empty text elements to the element list
if (element.raw != "" || element.type != ElementType.Text) {
+ if (this._options.includeLocation && !element.location) {
+ element.location = this.getLocation(element.type == ElementType.Tag);
+ }
this.parseAttribs(element);
this._elements.push(element);
//If tag self-terminates, add an explicit, separate closing tag
@@ -370,34 +370,39 @@ function Parser (handler, options) {
this._current = this._next + 1;
this._prevTagSep = tagSep;
}
-
-// this._lines.offset += this._lines.inBuffer;
-// this._lines.inBuffer = 0;
-// this._lines.charOffset = 0;
+ if (this._options.includeLocation) {
+ this.getLocation();
+ this._location.row += this._location.inBuffer;
+ this._location.inBuffer = 0;
+ this._location.charOffset = 0;
+ }
this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
this._current = 0;
this.writeHandler();
}
-// Parser.prototype.getLocation = function Parser$getLocation () {
-// var c;
-// var col = 0;
-// for (; this._lines.charOffset < this._current; this._lines.charOffset++) {
-// c = this._buffer.charAt(this._lines.charOffset);
-// if (c == '\n') {
-// this._lines.inBuffer++;
-// col = 0;
-// } else if (c != '\r') {
-// col++;
-// }
-// }
-// return {
-// line: this._lines.offset + this._lines.inBuffer
-// , col: col
-// };
-// }
+ Parser.prototype.getLocation = function Parser$getLocation (startTag) {
+ var c,
+ l = this._location,
+ end = this._current - (startTag ? 1 : 0),
+ chunk = startTag && l.charOffset == 0 && this._current == 0;
+
+ for (; l.charOffset < end; l.charOffset++) {
+ c = this._buffer.charAt(l.charOffset);
+ if (c == '\n') {
+ l.inBuffer++;
+ l.col = 0;
+ } else if (c != '\r') {
+ l.col++;
+ }
+ }
+ return {
+ line: l.row + l.inBuffer + 1
+ , col: l.col + (chunk ? 0: 1)
+ };
+ }
//Checks the handler to make it is an object with the right "interface"
Parser.prototype.validateHandler = function Parser$validateHandler (handler) {
View
135 tests/22-position_data.js
@@ -30,74 +30,71 @@ exports.options = {
handler: {}
, parser: { includeLocation: true }
};
-//TODO: re-instate test when chunked position tracking is fixed
-exports.html = "<FIXME>";
-exports.expected = [ { raw: 'FIXME', data: 'FIXME', type: 'tag', name: 'FIXME' } ];
-//exports.html = "<html>\r\n\n\t<title>The Title</title><body>\nHello world\r\n\n</body>\n\n</html>";
-//exports.expected = [
-// {
-// raw: 'html',
-// data: 'html',
-// type: 'tag',
-// location: {
-// line: 1,
-// col: 1
-// },
-// name: 'html',
-// children: [{
-// raw: '\n\n',
-// data: '\n\n',
-// type: 'text',
-// location: {
-// line: 1,
-// col: 5
-// }
-// }, {
-// raw: 'title',
-// data: 'title',
-// type: 'tag',
-// location: {
-// line: 3,
-// col: 2
-// },
-// name: 'title',
-// children: [{
-// raw: 'The Title',
-// data: 'The Title',
-// type: 'text',
-// location: {
-// line: 3,
-// col: 6
-// }
-// }]
-// }, {
-// raw: 'body',
-// data: 'body',
-// type: 'tag',
-// location: {
-// line: 3,
-// col: 1
-// },
-// name: 'body',
-// children: [{
-// raw: '\nHello world\n\n',
-// data: '\nHello world\n\n',
-// type: 'text',
-// location: {
-// line: 3,
-// col: 5
-// }
-// }]
-// }, {
-// raw: '\n\n',
-// data: '\n\n',
-// type: 'text',
-// location: {
-// line: 6,
-// col: 6
-// }
-// }]
-// }
-// ];
+exports.html = "<html>\r\n\n\t<title>The Title</title><body>\nHello world\r\n\n</body>\n\n</html>";
+exports.expected = [
+ {
+ raw: 'html',
+ data: 'html',
+ type: 'tag',
+ name: 'html',
+ location: {
+ line: 1,
+ col: 1
+ },
+ children: [{
+ raw: '\r\n\n\t',
+ data: '\r\n\n\t',
+ type: 'text',
+ location: {
+ line: 1,
+ col: 7
+ }
+ }, {
+ raw: 'title',
+ data: 'title',
+ type: 'tag',
+ name: 'title',
+ location: {
+ line: 3,
+ col: 2
+ },
+ children: [{
+ raw: 'The Title',
+ data: 'The Title',
+ type: 'text',
+ location: {
+ line: 3,
+ col: 9
+ }
+ }]
+ }, {
+ raw: 'body',
+ data: 'body',
+ type: 'tag',
+ name: 'body',
+ location: {
+ line: 3,
+ col: 26
+ },
+ children: [{
+ raw: '\nHello world\r\n\n',
+ data: '\nHello world\r\n\n',
+ type: 'text',
+ location: {
+ line: 3,
+ col: 32
+ }
+ }]
+ }, {
+ raw: '\n\n',
+ data: '\n\n',
+ type: 'text',
+ location: {
+ line: 6,
+ col: 8
+ }
+ }]
+ }
+ ];
})();
Please sign in to comment.
Something went wrong with that request. Please try again.