Skip to content

Commit

Permalink
configurable sax parsers (expat & saxjs backends)
Browse files Browse the repository at this point in the history
  • Loading branch information
astro committed Mar 15, 2012
1 parent bf56718 commit b6f9e1b
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 45 deletions.
2 changes: 2 additions & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ exports.escapeXml = element.escapeXml;

exports.Parser = parse.Parser;
exports.parse = parse.parse;
exports.availableSaxParsers = parse.availableSaxParsers;
exports.bestSaxParser = parse.bestSaxParser;
57 changes: 34 additions & 23 deletions lib/parse.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
var events = require('events');
var util;
try {
util = require('util');
} catch(e) {
util = require('sys');
}
var expat = require('node-expat');
var util = require('util');

exports.availableSaxParsers = [];
exports.bestSaxParser = null;
['./sax_expat', './sax_saxjs'].forEach(function(modName) {
var mod;
try {
mod = require(modName);
} catch (e) {
console.error(e);
}
if (mod) {
exports.availableSaxParsers.push(mod);
if (!exports.bestSaxParser)
exports.bestSaxParser = mod;
}
});
var element = require('./element');

exports.Parser = function() {
exports.Parser = function(saxParser) {
events.EventEmitter.call(this);
var that = this;

this.parser = new expat.Parser('UTF-8');
var parserMod = saxParser || exports.bestSaxParser;
if (!parserMod)
throw new Error("No SAX parser available");
this.parser = new parserMod();

var el;
this.parser.addListener('startElement', function(name, attrs) {
Expand All @@ -38,33 +52,30 @@ exports.Parser = function() {
if (el)
el.t(str);
});
this.parser.addListener('error', function(e) {
that.error = e;
that.emit('error', e);
});
};
util.inherits(exports.Parser, events.EventEmitter);

exports.Parser.prototype.write = function(data) {
if (!this.parser.parse(data, false)) {
this.emit('error', new Error(this.parser.getError()));

// Premature error thrown,
// disable all functionality:
this.write = function() { };
this.end = function() { };
}
this.parser.write(data);
};

exports.Parser.prototype.end = function() {
if (!this.parser.parse('', true))
this.emit('error', new Error(this.parser.getError()));
else {
exports.Parser.prototype.end = function(data) {
this.parser.end(data);

if (!this.error) {
if (this.tree)
this.emit('tree', this.tree);
else
this.emit('error', new Error('Incomplete document'));
}
};

exports.parse = function(data) {
var p = new exports.Parser();
exports.parse = function(data, saxParser) {
var p = new exports.Parser(saxParser);
var result = null, error = null;

p.on('tree', function(tree) {
Expand Down
39 changes: 39 additions & 0 deletions lib/sax_expat.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
var util = require('util');
var events = require('events');
var expat = require('node-expat');

var SaxExpat = module.exports = function SaxExpat() {
events.EventEmitter.call(this);
this.parser = new expat.Parser('UTF-8');

var that = this;
this.parser.on('startElement', function(name, attrs) {
that.emit('startElement', name, attrs);
});
this.parser.on('endElement', function(name) {
that.emit('endElement', name);
});
this.parser.on('text', function(str) {
that.emit('text', str);
});
// TODO: other events, esp. entityDecl (billion laughs!)
};
util.inherits(SaxExpat, events.EventEmitter);

SaxExpat.prototype.write = function(data) {
if (!this.parser.parse(data, false)) {
this.emit('error', new Error(this.parser.getError()));

// Premature error thrown,
// disable all functionality:
this.write = function() { };
this.end = function() { };
}
};

SaxExpat.prototype.end = function(data) {
if (!this.parser.parse('', true))
this.emit('error', new Error(this.parser.getError()));
else
this.emit('end');
};
37 changes: 37 additions & 0 deletions lib/sax_saxjs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
var util = require('util');
var events = require('events');
var sax = require('sax');

var SaxSaxjs = module.exports = function SaxSaxjs() {
events.EventEmitter.call(this);
this.parser = sax.parser(true);

var that = this;
this.parser.onopentag = function(a) {
that.emit('startElement', a.name, a.attributes);
};
this.parser.onclosetag = function(name) {
that.emit('endElement', name);
};
this.parser.ontext = function(str) {
that.emit('text', str);
};
this.parser.onend = function() {
that.emit('end');
};
this.parser.onerror = function(e) {
that.emit('error', e);
};
// TODO: other events, esp. entityDecl (billion laughs!)
};
util.inherits(SaxSaxjs, events.EventEmitter);

SaxSaxjs.prototype.write = function(data) {
this.parser.write(data);
};

SaxSaxjs.prototype.end = function(data) {
if (data)
this.parser.write(data);
this.parser.close();
};
61 changes: 39 additions & 22 deletions test/parse-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,43 @@ var vows = require('vows'),
assert = require('assert'),
ltx = require('./../lib/index');

vows.describe('ltx').addBatch({
'parsing': {
'simple document': function() {
var el = ltx.parse('<root/>');
assert.equal(el.name, 'root');
assert.equal(0, el.children.length);
},
'text with commas': function() {
var el = ltx.parse("<body>sa'sa'1'sasa</body>");
assert.equal("sa'sa'1'sasa", el.getText());
},
'erroneous document raises error': function() {
assert.throws(function() {
ltx.parse('<root></toor>');
});
},
'incomplete document raises error': function() {
assert.throws(function() {
ltx.parse('<root>');
});
ltx.availableSaxParsers.forEach(function(saxParser) {
var parse = function(s) {
return ltx.parse(s, saxParser);
};
vows.describe('ltx with ' + saxParser.name).addBatch({
'parsing': {
'simple document': function() {
var el = parse('<root/>');
assert.equal(el.name, 'root');
assert.equal(0, el.children.length);
},
'text with commas': function() {
var el = parse("<body>sa'sa'1'sasa</body>");
assert.equal("sa'sa'1'sasa", el.getText());
},
'erroneous document raises error': function() {
assert.throws(function() {
parse('<root></toor>');
});
},
'incomplete document raises error': function() {
assert.throws(function() {
parse('<root>');
});
},
'namespace declaration': function() {
var el = parse("<root xmlns='https://github.com/astro/ltx'/>");
assert.equal(el.name, 'root');
assert.equal(el.attrs.xmlns, 'https://github.com/astro/ltx');
assert.ok(el.is('root', 'https://github.com/astro/ltx'));
},
'namespace declaration with prefix': function() {
var el = parse("<x:root xmlns:x='https://github.com/astro/ltx'/>");
assert.equal(el.name, 'x:root');
assert.equal(el.getName(), 'root');
assert.ok(el.is('root', 'https://github.com/astro/ltx'));
}
}
}
}).export(module);
}).export(module);
});

0 comments on commit b6f9e1b

Please sign in to comment.