Permalink
Browse files

Add checks to prevent buffer overruns in a speed-conscious manner.

  • Loading branch information...
1 parent d4c3f03 commit f120def12420f16f129815f933666e6dd9948616 @isaacs isaacs committed Feb 21, 2010
Showing with 48 additions and 7 deletions.
  1. +48 −7 lib/sax.js
View
55 lib/sax.js
@@ -3,12 +3,27 @@ var sax = exports;
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) };
sax.SAXParser = SAXParser;
+// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
+// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
+// since that's the earliest that a buffer overrun could occur. This way, checks are
+// as rare as required, but as often as necessary to ensure never crossing this bound.
+// Furthermore, buffers are only tested at most once per write(), so passing a very
+// large string into write() might have undesirable effects, but this is manageable by
+// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
+// edge case, result in creating at most one complete copy of the string passed in.
+// Set to Infinity to have unlimited buffers.
+sax.MAX_BUFFER_LENGTH = 64 * 1024;
+
+var buffers = [
+ "comment", "sgmlDecl", "textNode", "tagName", "doctype",
+ "procInstName", "procInstBody", "entity", "attribName",
+ "attribValue", "cdata"
+];
+
function SAXParser (strict, opt) {
- this.c = this.comment = this.sgmlDecl =
- this.textNode = this.tagName = this.doctype =
- this.procInstName = this.procInstBody = this.entity =
- this.attribName = this.attribValue = this.q =
- this.cdata = this.sgmlDecl = "";
+ clearBuffers(this);
+ this.q = this.c = "";
+ this.bufferCheckPosition = sax.MAX_BUFFER_LENGTH;
this.opt = opt || {};
this.tagCase = this.opt.lowercasetags ? "toLowerCase" : "toUpperCase";
this.tags = [];
@@ -18,10 +33,35 @@ function SAXParser (strict, opt) {
this.state = S.BEGIN;
this.ENTITIES = Object.create(sax.ENTITIES);
- // just for error reporting
+ // mostly just for error reporting
this.position = this.line = this.column = 0;
emit(this, "onready");
}
+function checkBufferLength (parser) {
+ var maxAllowed = sax.MAX_BUFFER_LENGTH,
+ maxActual = 0;
+ for (var i = 0, l = buffers.length; i < l; i ++) {
+ var len = parser[buffers[i]].length;
+ if (len > maxAllowed) {
+ if (buffers[i] === "textNode") {
+ // Text nodes can get big, and since they're buffered, we can get here
+ // under normal conditions. Avoid issues by emitting the text node now,
+ // so at least it won't get any bigger.
+ closeText(parser);
+ } else {
+ error(parser, "Max buffer length exceeded: "+buffers[i]);
+ }
+ }
+ maxActual = Math.max(maxActual, len);
+ }
+ // schedule the next check for the earliest possible buffer overrun.
+ parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual) + parser.position;
+}
+function clearBuffers (parser) {
+ for (var i = 0, l = buffers.length; i < l; i ++) {
+ parser[buffers[i]] = "";
+ }
+}
SAXParser.prototype = {
write : write,
resume : function () { this.error = null; return this },
@@ -535,10 +575,11 @@ function write (chunk) {
break;
}
} // while
- // cdata blocks can get big. emit and move on.
+ // cdata blocks can get very big under normal conditions. emit and move on.
if (parser.state === S.CDATA && parser.cdata) {
emitNode(parser, "oncdata", parser.cdata);
parser.cdata = "";
}
+ if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser);
return parser;
}

0 comments on commit f120def

Please sign in to comment.