Permalink
Browse files

modern isaacs-style, and make Stream interface more normal

  • Loading branch information...
1 parent bd8fe5f commit 1e79e87e20205cd95ffd723a3cb31f6e236b9d51 @isaacs isaacs committed Jul 20, 2011
Showing with 401 additions and 378 deletions.
  1. +401 −378 lib/sax.js
View
779 lib/sax.js
@@ -1,8 +1,8 @@
// wrapper for non-node envs
;(function (sax) {
-sax.parser = function (strict, opt) { return new SAXParser(strict, opt) };
-sax.SAXParser = SAXParser;
+sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
+sax.SAXParser = SAXParser
// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
@@ -13,36 +13,37 @@ sax.SAXParser = SAXParser;
// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
// edge case, result in creating at most one complete copy of the string passed in.
// Set to Infinity to have unlimited buffers.
-sax.MAX_BUFFER_LENGTH = 64 * 1024;
+sax.MAX_BUFFER_LENGTH = 64 * 1024
var buffers = [
"comment", "sgmlDecl", "textNode", "tagName", "doctype",
"procInstName", "procInstBody", "entity", "attribName",
"attribValue", "cdata"
-];
+]
function SAXParser (strict, opt) {
- clearBuffers(this);
- this.q = this.c = "";
- this.bufferCheckPosition = sax.MAX_BUFFER_LENGTH;
- this.opt = opt || {};
- this.tagCase = this.opt.lowercasetags ? "toLowerCase" : "toUpperCase";
- this.tags = [];
- this.closed = this.closedRoot = this.sawRoot = false;
- this.tag = this.error = null;
- this.strict = !!strict;
- this.state = S.BEGIN;
- this.ENTITIES = Object.create(sax.ENTITIES);
+ clearBuffers(this)
+ this.q = this.c = ""
+ this.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
+ this.opt = opt || {}
+ this.tagCase = this.opt.lowercasetags ? "toLowerCase" : "toUpperCase"
+ this.tags = []
+ this.closed = this.closedRoot = this.sawRoot = false
+ this.tag = this.error = null
+ this.strict = !!strict
+ this.state = S.BEGIN
+ this.ENTITIES = Object.create(sax.ENTITIES)
// mostly just for error reporting
- this.position = this.line = this.column = 0;
- emit(this, "onready");
+ this.position = this.line = this.column = 0
+ emit(this, "onready")
}
+
function checkBufferLength (parser) {
- var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10),
- maxActual = 0;
+ var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
+ , maxActual = 0
for (var i = 0, l = buffers.length; i < l; i ++) {
- var len = parser[buffers[i]].length;
+ var len = parser[buffers[i]].length
if (len > maxAllowed) {
// Text/cdata nodes can get big, and since they're buffered,
// we can get here under normal conditions.
@@ -58,100 +59,110 @@ function checkBufferLength (parser) {
error(parser, "Max buffer length exceeded: "+buffers[i])
}
}
- maxActual = Math.max(maxActual, len);
+ maxActual = Math.max(maxActual, len)
}
// schedule the next check for the earliest possible buffer overrun.
- parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual) + parser.position;
+ parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
+ + parser.position
}
+
function clearBuffers (parser) {
for (var i = 0, l = buffers.length; i < l; i ++) {
- parser[buffers[i]] = "";
+ parser[buffers[i]] = ""
}
}
+
SAXParser.prototype = {
write : write,
resume : function () { this.error = null; return this },
close : function () { return this.write(null) },
}
-sax.createStream = function () {
- var stream = require('stream')
- , parser = sax.parser.apply(sax, arguments)
- , stream = new stream.Stream()
- ;
-
- stream.writable = true
- stream.write = function (data) {
- parser.write(data.toString())
- stream.emit(data)
- }
- stream.end = function (chunk) {
- if (chunk) parser.write(chunk.toString())
- emit('end')
+try {
+ var Stream = require("stream").Stream
+} catch (ex) {
+ var Stream = function () {}
+}
+
+function createStream (strict, opt) {
+ return new SAXStream(strict, opt)
+}
+
+function SAXStream (strict, opt) {
+ Stream.apply(me)
+
+ this._parser = new SAXParser(strict, opt)
+ this.writable = true
+ this.readable = true
+
+ var me = this
+ this._parser.onend = function () {
+ me.emit("end")
}
-
- parser.onend = function () {
- stream.end()
- };
-
- stream.__defineSetter__('onend', function (val) {
- parser.onend = function () {
- stream.end()
- if (val) val()
- }
- })
-
- var wraps =
- [ 'opentag'
- , 'closetag'
- , 'text'
- , 'attribute'
- , 'error'
- , 'doctype'
- , 'processinginstruction'
- , 'sgmldeclaration'
- , 'comment'
- , 'opencdata'
- , 'cdata'
- , 'closecdata'
- , 'ready'
- ]
-
- wraps.forEach(function (name) {
- var onFunc = function () {
- var args = Array.prototype.slice.apply(arguments)
- args.splice(0,0,name)
- stream.emit.apply(stream, args)
+}
+
+SAXStream.prototype = Object.create(Stream.prototype,
+ { constructor: { value: SAXStream } })
+
+SAXStream.prototype.write = function (data) {
+ this._parser.write(data.toString())
+ this.emit(data)
+}
+
+SAXStream.prototype.end = function (chunk) {
+ if (chunk && chunk.length) this._parser.write(chunk.toString())
+ this._parser.emit("end")
+}
+
+var streamWraps =
+ [ "opentag"
+ , "closetag"
+ , "text"
+ , "attribute"
+ , "error"
+ , "doctype"
+ , "processinginstruction"
+ , "sgmldeclaration"
+ , "comment"
+ , "opencdata"
+ , "cdata"
+ , "closecdata"
+ , "ready"
+ ]
+
+SAXStream.prototype.on = function (ev, handler) {
+ var me = this
+ if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
+ me._parser["on"+ev] = function () {
+ var args = arguments.length === 1 ? [arguments[0]]
+ : Array.apply(null, arguments)
+ args.splice(0, 0, ev)
+ me.emit.apply(me, args)
}
- parser['on'+name] = onFunc;
- stream.__defineSetter__('on'+name, function (val) {
- parser['on'+name] = function () {
- onFunc.apply(parser, arguments)
- if (val) val.apply(stream, arguments)
- }
- })
- })
-
- return stream
+ }
+ return Stream.prototype.on.call(me, ev, handler)
}
+
+
// character classes and tokens
-var whitespace = "\r\n\t ",
+var whitespace = "\r\n\t "
// this really needs to be replaced with character classes.
// XML allows all manner of ridiculous numbers and digits.
- number = "0124356789",
- letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
- // (Letter | '_' | ':')
- nameStart = letter+"_:",
- nameBody = nameStart+number+"-.",
- quote = "'\"",
- entity = number+letter+"#",
- CDATA = "[CDATA[",
- DOCTYPE = "DOCTYPE";
+ , number = "0124356789"
+ , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ // (Letter | "_" | ":")
+ , nameStart = letter+"_:"
+ , nameBody = nameStart+number+"-."
+ , quote = "'\""
+ , entity = number+letter+"#"
+ , CDATA = "[CDATA["
+ , DOCTYPE = "DOCTYPE"
+
function is (charclass, c) { return charclass.indexOf(c) !== -1 }
function not (charclass, c) { return !is(charclass, c) }
-var S = 0;
+var S = 0
sax.STATE =
{ BEGIN : S++
, TEXT : S++ // general stuff
@@ -196,486 +207,498 @@ sax.ENTITIES =
, "lt" : "<"
}
-for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S;
+for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
// shorthand
-S = sax.STATE;
+S = sax.STATE
sax.EVENTS = [ // for discoverability.
"text", "processinginstruction", "sgmldeclaration",
"doctype", "comment", "attribute", "opentag", "closetag",
- "opencdata", "cdata", "closecdata", "error", "end", "ready" ];
+ "opencdata", "cdata", "closecdata", "error", "end", "ready" ]
function emit (parser, event, data) {
- parser[event] && parser[event](data);
+ parser[event] && parser[event](data)
}
+
function emitNode (parser, nodeType, data) {
- if (parser.textNode) closeText(parser);
- emit(parser, nodeType, data);
+ if (parser.textNode) closeText(parser)
+ emit(parser, nodeType, data)
}
+
function closeText (parser) {
- parser.textNode = textopts(parser.opt, parser.textNode);
- if (parser.textNode) emit(parser, "ontext", parser.textNode);
- parser.textNode = "";
+ parser.textNode = textopts(parser.opt, parser.textNode)
+ if (parser.textNode) emit(parser, "ontext", parser.textNode)
+ parser.textNode = ""
}
+
function textopts (opt, text) {
- if (opt.trim) text = text.trim();
- if (opt.normalize) text = text.replace(/\s+/g, " ");
- return text;
+ if (opt.trim) text = text.trim()
+ if (opt.normalize) text = text.replace(/\s+/g, " ")
+ return text
}
+
function error (parser, er) {
- closeText(parser);
+ closeText(parser)
er += "\nLine: "+parser.line+
"\nColumn: "+parser.column+
- "\nChar: "+parser.c;
- er = new Error(er);
- parser.error = er;
- emit(parser, "onerror", er);
- return parser;
+ "\nChar: "+parser.c
+ er = new Error(er)
+ parser.error = er
+ emit(parser, "onerror", er)
+ return parser
}
+
function end (parser) {
- if (parser.state !== S.TEXT) error(parser, "Unexpected end");
- closeText(parser);
- parser.c = "";
- parser.closed = true;
- emit(parser, "onend");
- SAXParser.call(parser, parser.strict, parser.opt);
- return parser;
+ if (parser.state !== S.TEXT) error(parser, "Unexpected end")
+ closeText(parser)
+ parser.c = ""
+ parser.closed = true
+ emit(parser, "onend")
+ SAXParser.call(parser, parser.strict, parser.opt)
+ return parser
}
+
function strictFail (parser, message) {
- if (parser.strict) error(parser, message);
+ if (parser.strict) error(parser, message)
}
+
function newTag (parser) {
- if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]();
- parser.tag = { name : parser.tagName, attributes : {} };
+ if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]()
+ parser.tag = { name : parser.tagName, attributes : {} }
}
+
function openTag (parser, selfClosing) {
- parser.sawRoot = true;
- parser.tags.push(parser.tag);
- emitNode(parser, "onopentag", parser.tag);
+ parser.sawRoot = true
+ parser.tags.push(parser.tag)
+ emitNode(parser, "onopentag", parser.tag)
if (!selfClosing) {
- parser.tag = null;
- parser.tagName = "";
- parser.state = S.TEXT;
+ parser.tag = null
+ parser.tagName = ""
+ parser.state = S.TEXT
}
- parser.attribName = parser.attribValue = "";
+ parser.attribName = parser.attribValue = ""
}
+
function closeTag (parser) {
if (!parser.tagName) {
- strictFail(parser, "Weird empty close tag.");
- parser.textNode += "</>";
- parser.state = S.TEXT;
- return;
+ strictFail(parser, "Weird empty close tag.")
+ parser.textNode += "</>"
+ parser.state = S.TEXT
+ return
}
// first make sure that the closing tag actually exists.
// <a><b></c></b></a> will close everything, otherwise.
- var t = parser.tags.length;
- if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]();
- var closeTo = parser.tagName;
+ var t = parser.tags.length
+ if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]()
+ var closeTo = parser.tagName
while (t --) {
- var close = parser.tags[t];
+ var close = parser.tags[t]
if (close.name !== closeTo) {
// fail the first time in strict mode
- strictFail(parser, "Unexpected close tag");
- } else break;
+ strictFail(parser, "Unexpected close tag")
+ } else break
}
// didn't find it. we already failed for strict, so just abort.
- if (t < 0) return;
- var s = parser.tags.length;
+ if (t < 0) return
+ var s = parser.tags.length
while (s --> t) {
- parser.tag = parser.tags.pop();
- parser.tagName = parser.tag.name;
- emitNode(parser, "onclosetag", parser.tagName);
+ parser.tag = parser.tags.pop()
+ parser.tagName = parser.tag.name
+ emitNode(parser, "onclosetag", parser.tagName)
}
- if (t === 0) parser.closedRoot = true;
- parser.tagName = parser.attribValue = parser.attribName = "";
- parser.tag = null;
- parser.state = S.TEXT;
+ if (t === 0) parser.closedRoot = true
+ parser.tagName = parser.attribValue = parser.attribName = ""
+ parser.tag = null
+ parser.state = S.TEXT
}
+
function parseEntity (parser) {
- var entity = parser.entity.toLowerCase(), num, numStr = "";
- if (parser.ENTITIES[entity]) return parser.ENTITIES[entity];
+ var entity = parser.entity.toLowerCase(), num, numStr = ""
+ if (parser.ENTITIES[entity]) return parser.ENTITIES[entity]
if (entity.charAt(0) === "#") {
if (entity.charAt(1) === "x") {
- entity = entity.slice(2);
- num = parseInt(entity, 16), numStr = num.toString(16);
+ entity = entity.slice(2)
+ num = parseInt(entity, 16), numStr = num.toString(16)
} else {
- entity = entity.slice(1);
- num = parseInt(entity, 10), numStr = num.toString(10);
+ entity = entity.slice(1)
+ num = parseInt(entity, 10), numStr = num.toString(10)
}
}
if (numStr.toLowerCase() !== entity) {
- strictFail(parser, "Invalid character entity");
- return "&"+parser.entity + ";";
+ strictFail(parser, "Invalid character entity")
+ return "&"+parser.entity + ";"
}
- return String.fromCharCode(num);
+ return String.fromCharCode(num)
}
function write (chunk) {
- var parser = this;
- if (this.error) throw this.error;
+ var parser = this
+ if (this.error) throw this.error
if (parser.closed) return error(parser,
- "Cannot write after close. Assign an onready handler.");
- if (chunk === null) return end(parser);
+ "Cannot write after close. Assign an onready handler.")
+ if (chunk === null) return end(parser)
var i = 0, c = ""
while (parser.c = c = chunk.charAt(i++)) {
- parser.position ++;
+ parser.position ++
if (c === "\n") {
- parser.line ++;
- parser.column = 0;
- } else parser.column ++;
+ parser.line ++
+ parser.column = 0
+ } else parser.column ++
switch (parser.state) {
case S.BEGIN:
- if (c === "<") parser.state = S.OPEN_WAKA;
+ if (c === "<") parser.state = S.OPEN_WAKA
else if (not(whitespace,c)) {
// have to process this as a text node.
// weird, but happens.
- strictFail(parser, "Non-whitespace before first tag.");
- parser.textNode = c;
- state = S.TEXT;
+ strictFail(parser, "Non-whitespace before first tag.")
+ parser.textNode = c
+ state = S.TEXT
}
- continue;
+ continue
case S.TEXT:
if (parser.sawRoot && !parser.closedRoot) {
- var starti = i-1;
+ var starti = i-1
while (c && c!=="<" && c!=="&") {
- c = chunk.charAt(i++);
+ c = chunk.charAt(i++)
if (c) {
- parser.position ++;
+ parser.position ++
if (c === "\n") {
- parser.line ++;
- parser.column = 0;
- } else parser.column ++;
+ parser.line ++
+ parser.column = 0
+ } else parser.column ++
}
}
- parser.textNode += chunk.substring(starti, i-1);
+ parser.textNode += chunk.substring(starti, i-1)
}
- if (c === "<") parser.state = S.OPEN_WAKA;
+ if (c === "<") parser.state = S.OPEN_WAKA
else {
if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
- strictFail("Text data outside of root node.");
- if (c === "&") parser.state = S.TEXT_ENTITY;
- else parser.textNode += c;
+ strictFail("Text data outside of root node.")
+ if (c === "&") parser.state = S.TEXT_ENTITY
+ else parser.textNode += c
}
- continue;
+ continue
case S.OPEN_WAKA:
// either a /, ?, !, or text is coming next.
if (c === "!") {
- parser.state = S.SGML_DECL;
- parser.sgmlDecl = "";
+ parser.state = S.SGML_DECL
+ parser.sgmlDecl = ""
} else if (is(whitespace, c)) {
// wait for it...
} else if (is(nameStart,c)) {
- parser.state = S.OPEN_TAG;
- parser.tagName = c;
+ parser.state = S.OPEN_TAG
+ parser.tagName = c
} else if (c === "/") {
- parser.state = S.CLOSE_TAG;
- parser.tagName = "";
+ parser.state = S.CLOSE_TAG
+ parser.tagName = ""
} else if (c === "?") {
- parser.state = S.PROC_INST;
- parser.procInstName = parser.procInstBody = "";
+ parser.state = S.PROC_INST
+ parser.procInstName = parser.procInstBody = ""
} else {
- strictFail(parser, "Unencoded <");
- parser.textNode += "<" + c;
- parser.state = S.TEXT;
+ strictFail(parser, "Unencoded <")
+ parser.textNode += "<" + c
+ parser.state = S.TEXT
}
- continue;
+ continue
case S.SGML_DECL:
if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
- emitNode(parser, "onopencdata");
- parser.state = S.CDATA;
- parser.sgmlDecl = "";
- parser.cdata = "";
+ emitNode(parser, "onopencdata")
+ parser.state = S.CDATA
+ parser.sgmlDecl = ""
+ parser.cdata = ""
} else if (parser.sgmlDecl+c === "--") {
- parser.state = S.COMMENT;
- parser.comment = "";
- parser.sgmlDecl = "";
+ parser.state = S.COMMENT
+ parser.comment = ""
+ parser.sgmlDecl = ""
} else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
- parser.state = S.DOCTYPE;
+ parser.state = S.DOCTYPE
if (parser.doctype || parser.sawRoot) strictFail(parser,
- "Inappropriately located doctype declaration");
- parser.doctype = "";
- parser.sgmlDecl = "";
+ "Inappropriately located doctype declaration")
+ parser.doctype = ""
+ parser.sgmlDecl = ""
} else if (c === ">") {
- emitNode(parser, "onsgmldeclaration", parser.sgmlDecl);
- parser.sgmlDecl = "";
- parser.state = S.TEXT;
+ emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
+ parser.sgmlDecl = ""
+ parser.state = S.TEXT
} else if (is(quote, c)) {
- parser.state = S.SGML_DECL_QUOTED;
- parser.sgmlDecl += c;
- } else parser.sgmlDecl += c;
- continue;
+ parser.state = S.SGML_DECL_QUOTED
+ parser.sgmlDecl += c
+ } else parser.sgmlDecl += c
+ continue
case S.SGML_DECL_QUOTED:
if (c === parser.q) {
- parser.state = S.SGML_DECL;
- parser.q = "";
+ parser.state = S.SGML_DECL
+ parser.q = ""
}
- parser.sgmlDecl += c;
- continue;
+ parser.sgmlDecl += c
+ continue
case S.DOCTYPE:
if (c === ">") {
- parser.state = S.TEXT;
- emitNode(parser, "ondoctype", parser.doctype);
- parser.doctype = true; // just remember that we saw it.
+ parser.state = S.TEXT
+ emitNode(parser, "ondoctype", parser.doctype)
+ parser.doctype = true // just remember that we saw it.
} else {
- parser.doctype += c;
- if (c === "[") parser.state = S.DOCTYPE_DTD;
+ parser.doctype += c
+ if (c === "[") parser.state = S.DOCTYPE_DTD
else if (is(quote, c)) {
- parser.state = S.DOCTYPE_QUOTED;
- parser.q = c;
+ parser.state = S.DOCTYPE_QUOTED
+ parser.q = c
}
}
- continue;
+ continue
case S.DOCTYPE_QUOTED:
- parser.doctype += c;
+ parser.doctype += c
if (c === parser.q) {
- parser.q = "";
- parser.state = S.DOCTYPE;
+ parser.q = ""
+ parser.state = S.DOCTYPE
}
- continue;
+ continue
case S.DOCTYPE_DTD:
- parser.doctype += c;
- if (c === "]") parser.state = S.DOCTYPE;
+ parser.doctype += c
+ if (c === "]") parser.state = S.DOCTYPE
else if (is(quote,c)) {
- parser.state = S.DOCTYPE_DTD_QUOTED;
- parser.q = c;
+ parser.state = S.DOCTYPE_DTD_QUOTED
+ parser.q = c
}
- continue;
+ continue
case S.DOCTYPE_DTD_QUOTED:
- parser.doctype += c;
+ parser.doctype += c
if (c === parser.q) {
- parser.state = S.DOCTYPE_DTD;
- parser.q = "";
+ parser.state = S.DOCTYPE_DTD
+ parser.q = ""
}
- continue;
+ continue
case S.COMMENT:
- if (c === "-") parser.state = S.COMMENT_ENDING;
- else parser.comment += c;
- continue;
+ if (c === "-") parser.state = S.COMMENT_ENDING
+ else parser.comment += c
+ continue
case S.COMMENT_ENDING:
if (c === "-") {
- parser.state = S.COMMENT_ENDED;
- parser.comment = textopts(parser.opt, parser.comment);
- if (parser.comment) emitNode(parser, "oncomment", parser.comment);
- parser.comment = "";
+ parser.state = S.COMMENT_ENDED
+ parser.comment = textopts(parser.opt, parser.comment)
+ if (parser.comment) emitNode(parser, "oncomment", parser.comment)
+ parser.comment = ""
} else {
- strictFail(parser, "Invalid comment");
- parser.comment += "-" + c;
+ strictFail(parser, "Invalid comment")
+ parser.comment += "-" + c
}
- continue;
+ continue
case S.COMMENT_ENDED:
- if (c !== ">") strictFail(parser, "Malformed comment");
- else parser.state = S.TEXT;
- continue;
+ if (c !== ">") strictFail(parser, "Malformed comment")
+ else parser.state = S.TEXT
+ continue
case S.CDATA:
- if (c === "]") parser.state = S.CDATA_ENDING;
- else parser.cdata += c;
- continue;
+ if (c === "]") parser.state = S.CDATA_ENDING
+ else parser.cdata += c
+ continue
case S.CDATA_ENDING:
- if (c === "]") parser.state = S.CDATA_ENDING_2;
+ if (c === "]") parser.state = S.CDATA_ENDING_2
else {
- parser.cdata += "]" + c;
- parser.state = S.CDATA;
+ parser.cdata += "]" + c
+ parser.state = S.CDATA
}
- continue;
+ continue
case S.CDATA_ENDING_2:
if (c === ">") {
- if (parser.cdata) emitNode(parser, "oncdata", parser.cdata);
- emitNode(parser, "onclosecdata");
- parser.cdata = "";
- parser.state = S.TEXT;
+ if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
+ emitNode(parser, "onclosecdata")
+ parser.cdata = ""
+ parser.state = S.TEXT
} else if (c === "]") {
parser.cdata += "]"
} else {
- parser.cdata += "]]" + c;
- parser.state = S.CDATA;
+ parser.cdata += "]]" + c
+ parser.state = S.CDATA
}
- continue;
+ continue
case S.PROC_INST:
- if (c === "?") parser.state = S.PROC_INST_ENDING;
- else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY;
- else parser.procInstName += c;
- continue;
+ if (c === "?") parser.state = S.PROC_INST_ENDING
+ else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
+ else parser.procInstName += c
+ continue
case S.PROC_INST_BODY:
- if (!parser.procInstBody && is(whitespace, c)) continue;
- else if (c === "?") parser.state = S.PROC_INST_ENDING;
+ if (!parser.procInstBody && is(whitespace, c)) continue
+ else if (c === "?") parser.state = S.PROC_INST_ENDING
else if (is(quote, c)) {
- parser.state = S.PROC_INST_QUOTED;
- parser.q = c;
- parser.procInstBody += c;
- } else parser.procInstBody += c;
- continue;
+ parser.state = S.PROC_INST_QUOTED
+ parser.q = c
+ parser.procInstBody += c
+ } else parser.procInstBody += c
+ continue
case S.PROC_INST_ENDING:
if (c === ">") {
emitNode(parser, "onprocessinginstruction", {
name : parser.procInstName,
body : parser.procInstBody
- });
- parser.procInstName = parser.procInstBody = "";
- parser.state = S.TEXT;
+ })
+ parser.procInstName = parser.procInstBody = ""
+ parser.state = S.TEXT
} else {
- parser.procInstBody += "?" + c;
- parser.state = S.PROC_INST_BODY;
+ parser.procInstBody += "?" + c
+ parser.state = S.PROC_INST_BODY
}
- continue;
+ continue
case S.PROC_INST_QUOTED:
- parser.procInstBody += c;
+ parser.procInstBody += c
if (c === parser.q) {
- parser.state = S.PROC_INST_BODY;
- parser.q = "";
+ parser.state = S.PROC_INST_BODY
+ parser.q = ""
}
- continue;
+ continue
case S.OPEN_TAG:
- if (is(nameBody, c)) parser.tagName += c;
+ if (is(nameBody, c)) parser.tagName += c
else {
- newTag(parser);
- if (c === ">") openTag(parser);
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH;
+ newTag(parser)
+ if (c === ">") openTag(parser)
+ else if (c === "/") parser.state = S.OPEN_TAG_SLASH
else {
if (not(whitespace, c)) strictFail(
- parser, "Invalid character in tag name");
- parser.state = S.ATTRIB;
+ parser, "Invalid character in tag name")
+ parser.state = S.ATTRIB
}
}
- continue;
+ continue
case S.OPEN_TAG_SLASH:
if (c === ">") {
- openTag(parser, true);
- closeTag(parser);
+ openTag(parser, true)
+ closeTag(parser)
} else {
- strictFail(parser, "Forward-slash in opening tag not followed by >");
- parser.state = S.ATTRIB;
+ strictFail(parser, "Forward-slash in opening tag not followed by >")
+ parser.state = S.ATTRIB
}
- continue;
+ continue
case S.ATTRIB:
// haven't read the attribute name yet.
- if (is(whitespace, c)) continue;
- else if (c === ">") openTag(parser);
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH;
+ if (is(whitespace, c)) continue
+ else if (c === ">") openTag(parser)
+ else if (c === "/") parser.state = S.OPEN_TAG_SLASH
else if (is(nameStart, c)) {
- parser.attribName = c;
- parser.attribValue = "";
- parser.state = S.ATTRIB_NAME;
- } else strictFail(parser, "Invalid attribute name");
- continue;
+ parser.attribName = c
+ parser.attribValue = ""
+ parser.state = S.ATTRIB_NAME
+ } else strictFail(parser, "Invalid attribute name")
+ continue
case S.ATTRIB_NAME:
- if (c === "=") parser.state = S.ATTRIB_VALUE;
- else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE;
- else if (is(nameBody, c)) parser.attribName += c;
- else strictFail(parser, "Invalid attribute name");
- continue;
+ if (c === "=") parser.state = S.ATTRIB_VALUE
+ else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
+ else if (is(nameBody, c)) parser.attribName += c
+ else strictFail(parser, "Invalid attribute name")
+ continue
case S.ATTRIB_NAME_SAW_WHITE:
- if (c === "=") parser.state = S.ATTRIB_VALUE;
- else if (is(whitespace, c)) continue;
+ if (c === "=") parser.state = S.ATTRIB_VALUE
+ else if (is(whitespace, c)) continue
else {
- strictFail(parser, "Attribute without value");
- parser.tag.attributes[parser.attribName] = "";
- parser.attribValue = "";
- emitNode(parser, "onattribute", { name : parser.attribName, value : "" });
- parser.attribName = "";
- if (c === ">") openTag(parser);
+ strictFail(parser, "Attribute without value")
+ parser.tag.attributes[parser.attribName] = ""
+ parser.attribValue = ""
+ emitNode(parser, "onattribute",
+ { name : parser.attribName, value : "" })
+ parser.attribName = ""
+ if (c === ">") openTag(parser)
else if (is(nameStart, c)) {
- parser.attribName = c;
- parser.state = S.ATTRIB_NAME;
+ parser.attribName = c
+ parser.state = S.ATTRIB_NAME
} else {
- strictFail(parser, "Invalid attribute name");
- parser.state = S.ATTRIB;
+ strictFail(parser, "Invalid attribute name")
+ parser.state = S.ATTRIB
}
}
- continue;
+ continue
case S.ATTRIB_VALUE:
- if (is(whitespace, c)) continue;
+ if (is(whitespace, c)) continue
else if (is(quote, c)) {
- parser.q = c;
- parser.state = S.ATTRIB_VALUE_QUOTED;
+ parser.q = c
+ parser.state = S.ATTRIB_VALUE_QUOTED
} else {
- strictFail(parser, "Unquoted attribute value");
- parser.state = S.ATTRIB_VALUE_UNQUOTED;
- parser.attribValue = c;
+ strictFail(parser, "Unquoted attribute value")
+ parser.state = S.ATTRIB_VALUE_UNQUOTED
+ parser.attribValue = c
}
- continue;
+ continue
case S.ATTRIB_VALUE_QUOTED:
if (c !== parser.q) {
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q;
- else parser.attribValue += c;
- continue;
+ if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
+ else parser.attribValue += c
+ continue
}
- parser.tag.attributes[parser.attribName] = parser.attribValue;
+ parser.tag.attributes[parser.attribName] = parser.attribValue
emitNode(parser, "onattribute", {
- name:parser.attribName, value:parser.attribValue});
- parser.attribName = parser.attribValue = "";
- parser.q = "";
- parser.state = S.ATTRIB;
- continue;
+ name:parser.attribName, value:parser.attribValue})
+ parser.attribName = parser.attribValue = ""
+ parser.q = ""
+ parser.state = S.ATTRIB
+ continue
case S.ATTRIB_VALUE_UNQUOTED:
if (not(whitespace+">",c)) {
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U;
- else parser.attribValue += c;
- continue;
+ if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
+ else parser.attribValue += c
+ continue
}
- emitNode(parser, "onattribute", {
- name:parser.attribName, value:parser.attribValue});
- parser.attribName = parser.attribValue = "";
- if (c === ">") openTag(parser);
- else parser.state = S.ATTRIB;
- continue;
+ emitNode(parser, "onattribute",
+ { name: parser.attribName, value: parser.attribValue})
+ parser.attribName = parser.attribValue = ""
+ if (c === ">") openTag(parser)
+ else parser.state = S.ATTRIB
+ continue
case S.CLOSE_TAG:
if (!parser.tagName) {
- if (is(whitespace, c)) continue;
+ if (is(whitespace, c)) continue
else if (not(nameStart, c)) strictFail(parser,
- "Invalid tagname in closing tag.");
- else parser.tagName = c;
+ "Invalid tagname in closing tag.")
+ else parser.tagName = c
}
- else if (c === ">") closeTag(parser);
- else if (is(nameBody, c)) parser.tagName += c;
+ else if (c === ">") closeTag(parser)
+ else if (is(nameBody, c)) parser.tagName += c
else {
if (not(whitespace, c)) strictFail(parser,
- "Invalid tagname in closing tag");
- parser.state = S.CLOSE_TAG_SAW_WHITE;
+ "Invalid tagname in closing tag")
+ parser.state = S.CLOSE_TAG_SAW_WHITE
}
- continue;
+ continue
case S.CLOSE_TAG_SAW_WHITE:
- if (is(whitespace, c)) continue;
- if (c === ">") closeTag(parser);
- else strictFail("Invalid characters in closing tag");
- continue;
+ if (is(whitespace, c)) continue
+ if (c === ">") closeTag(parser)
+ else strictFail("Invalid characters in closing tag")
+ continue
case S.TEXT_ENTITY:
case S.ATTRIB_VALUE_ENTITY_Q:
case S.ATTRIB_VALUE_ENTITY_U:
switch(parser.state) {
case S.TEXT_ENTITY:
- var returnState = S.TEXT, buffer = "textNode";
- break;
+ var returnState = S.TEXT, buffer = "textNode"
+ break
case S.ATTRIB_VALUE_ENTITY_Q:
- var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue";
- break;
+ var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
+ break
case S.ATTRIB_VALUE_ENTITY_U:
- var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue";
- break;
+ var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
+ break
}
if (c === ";") {
- parser[buffer] += parseEntity(parser);
- parser.entity = "";
- parser.state = returnState;
+ parser[buffer] += parseEntity(parser)
+ parser.entity = ""
+ parser.state = returnState
}
- else if (is(entity, c)) parser.entity += c;
+ else if (is(entity, c)) parser.entity += c
else {
- strictFail("Invalid character entity");
- parser[buffer] += "&" + parser.entity;
- parser.entity = "";
- parser.state = returnState;
+ strictFail("Invalid character entity")
+ parser[buffer] += "&" + parser.entity
+ parser.entity = ""
+ parser.state = returnState
}
- continue;
+ continue
default:
- throw new Error(parser, "Unknown state: " + parser.state);
- break;
+ throw new Error(parser, "Unknown state: " + parser.state)
+ break
}
} // while
// cdata blocks can get very big under normal conditions. emit and move on.
// if (parser.state === S.CDATA && parser.cdata) {
- // emitNode(parser, "oncdata", parser.cdata);
- // parser.cdata = "";
+ // emitNode(parser, "oncdata", parser.cdata)
+ // parser.cdata = ""
// }
- if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser);
- return parser;
+ if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
+ return parser
}
+
})(typeof exports === "undefined" ? sax = {} : exports)

0 comments on commit 1e79e87

Please sign in to comment.