Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Support for Sax's createStream + some extra fields in the data structure for easier querying #1

Merged
merged 1 commit into from

3 participants

Mattias Bengtsson Jann Horn Alexandru Topliceanu
Mattias Bengtsson
  • Added support for sax's new createStream method
  • Added some extra fields for querying the data. Not much, but was handy for me. Might be a bit slower.
  • Changed the test accordingly.

Was able to parse 64M of xml to JSON in constant space. (Need to test more).

This was done in haste a late night. I understand if you don't want to apply all of this. If so tell me and I'll split up the patch!

Mattias Bengtsson moonlite * Added support for sax.js's createStream
* Added some extra code for querying the data. Not much, but was handy for me.
* Changed the test accordingly.

Was able to parse 64M of xml to JSON in constant space.
b7b6715
Jann Horn
Owner

Looks good to me. I'll probably merge and publish it later today.

Mattias Bengtsson

Sweet!
I would like to abstract away the implementations of createParser and createStream also since they are identical, but I'm not sure how to do that in a good way, but this works. :)

Jann Horn
Owner

Maybe you could add a function for both with an if/else? And make two aliases to it. Then you don't need redundant listeners.

Mattias Bengtsson

Hm yeah, something like that. Will see if I'll get this done after work tonight. :)

Jann Horn
Owner

@moonlite Hmm, no update? Guess I'll just merge this for now.

Jann Horn thejh merged commit b43d8ba into from
Mattias Bengtsson
Alexandru Topliceanu

parseStream still isn't published on npm :(

Jann Horn
Owner

@topliceanu Sorry about that, published.

Alexandru Topliceanu

Great, thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jan 18, 2012
  1. Mattias Bengtsson

    * Added support for sax.js's createStream

    moonlite authored
    * Added some extra code for querying the data. Not much, but was handy for me.
    * Changed the test accordingly.
    
    Was able to parse 64M of xml to JSON in constant space.
This page is out of date. Refresh to see the latest.
Showing with 149 additions and 21 deletions.
  1. +35 −3 index.coffee
  2. +63 −6 index.js
  3. +1 −1  package.json
  4. +20 −5 test.coffee
  5. +30 −6 test.js
38 index.coffee
View
@@ -11,8 +11,12 @@ exports.createParser = (cbError, cbFinished, wantedNodes, strict) ->
parser.onopentag = ({name, attributes}) ->
if wantedNodes[name]? or element?
parent = element
- element = {parent, name, attributes, children: []}
- parent?.children.push element
+ element = {parent, name, attributes, children: {all: [], tags: [], text: []}}
+ if parent?
+ parent.children.all.push element
+ if not parent.children.tags[name]?
+ parent.children.tags[name] = new Array
+ parent.children.tags[name].push element
if wantedNodes[name]?
interestingStack.push name
parser.onclosetag = (name) ->
@@ -21,5 +25,33 @@ exports.createParser = (cbError, cbFinished, wantedNodes, strict) ->
wantedNodes[name] element
element = element?.parent
parser.ontext = (text) ->
- element?.children.push text
+ element?.children.all.push text
+ element?.children.text.push text
parser
+
+exports.createStream = (cbError, cbFinished, wantedNodes, strict) ->
+ interestingStack = []
+ element = null
+ stream = sax.createStream strict
+ stream.on "error" , cbError
+ stream.on "end" , cbFinished
+ stream.on "opentag", ({name, attributes}) ->
+ if wantedNodes[name]? or element?
+ parent = element
+ element = {parent, name, attributes, children: {all: [], tags: [], text: []}}
+ if parent?
+ parent.children.all.push element
+ if not parent.children.tags[name]?
+ parent.children.tags[name] = new Array
+ parent.children.tags[name].push element
+ if wantedNodes[name]?
+ interestingStack.push name
+ stream.on "closetag", (name) ->
+ if name is last interestingStack
+ interestingStack.pop()
+ wantedNodes[name] element
+ element = element?.parent
+ stream.on "text", (text) ->
+ element?.children.all.push text
+ element?.children.text.push text
+ stream
69 index.js
View
@@ -1,9 +1,12 @@
(function() {
var last, sax;
+
sax = require('sax');
+
last = function(array) {
return array[array.length - 1];
};
+
exports.createParser = function(cbError, cbFinished, wantedNodes, strict) {
var element, interestingStack, parser;
interestingStack = [];
@@ -20,15 +23,21 @@
parent: parent,
name: name,
attributes: attributes,
- children: []
+ children: {
+ all: [],
+ tags: [],
+ text: []
+ }
};
if (parent != null) {
- parent.children.push(element);
+ parent.children.all.push(element);
+ if (!(parent.children.tags[name] != null)) {
+ parent.children.tags[name] = new Array;
+ }
+ parent.children.tags[name].push(element);
}
}
- if (wantedNodes[name] != null) {
- return interestingStack.push(name);
- }
+ if (wantedNodes[name] != null) return interestingStack.push(name);
};
parser.onclosetag = function(name) {
if (name === last(interestingStack)) {
@@ -38,8 +47,56 @@
return element = element != null ? element.parent : void 0;
};
parser.ontext = function(text) {
- return element != null ? element.children.push(text) : void 0;
+ if (element != null) element.children.all.push(text);
+ return element != null ? element.children.text.push(text) : void 0;
};
return parser;
};
+
+ exports.createStream = function(cbError, cbFinished, wantedNodes, strict) {
+ var element, interestingStack, stream;
+ interestingStack = [];
+ element = null;
+ stream = sax.createStream(strict);
+ stream.on("error", cbError);
+ stream.on("end", cbFinished);
+ stream.on("opentag", function(_arg) {
+ var attributes, name, parent;
+ name = _arg.name, attributes = _arg.attributes;
+ if ((wantedNodes[name] != null) || (element != null)) {
+ parent = element;
+ element = {
+ parent: parent,
+ name: name,
+ attributes: attributes,
+ children: {
+ all: [],
+ tags: [],
+ text: []
+ }
+ };
+ if (parent != null) {
+ parent.children.all.push(element);
+ if (!(parent.children.tags[name] != null)) {
+ parent.children.tags[name] = new Array;
+ }
+ parent.children.tags[name].push(element);
+ }
+ }
+ if (wantedNodes[name] != null) return interestingStack.push(name);
+ });
+ stream.on("closetag", function(name) {
+ if (name === last(interestingStack)) {
+ interestingStack.pop();
+ wantedNodes[name](element);
+ }
+ return element = element != null ? element.parent : void 0;
+ });
+ stream.on("text", function(text) {
+ if (element != null) element.children.all.push(text);
+ return element != null ? element.children.text.push(text) : void 0;
+ });
+ return stream;
+ };
+
}).call(this);
2  package.json
View
@@ -14,7 +14,7 @@
, "url": "http://github.com/thejh/node-halfstreamxml.git"
}
, "dependencies":
- { "sax": "0.1.2"
+ { "sax": "0.3.5"
}
, "files":
[ "package.json"
25 test.coffee
View
@@ -1,10 +1,25 @@
halfstreamxml = require './index'
+
onerror = (e) ->
console.error "ERROR: #{e}"
-onfinish = ->
- console.log "DONE"
+onfinishparser = ->
+ console.log "PARSER TEST DONE\n"
+onfinishstream = ->
+ console.log "STREAM TEST DONE\n"
+
wantednodes =
PERSON: (person) ->
- console.log "received person: #{JSON.stringify person, ['name', 'attributes', 'age', 'children']}"
-parser = halfstreamxml.createParser onerror, onfinish, wantednodes, false
-parser.write('<xml><person age="15"><name>Barfoo</name></person><person age="17"><name>Foo Bar</name></person></xml>').close()
+ str = JSON.stringify {
+ name: person.children.tags.NAME[0].children.text.join()
+ age: person.attributes.age
+ }
+ console.log "received person: #{str}"
+
+xml = '<xml><person age="15"><name>Barfoo</name></person><person age="17"><name>Foo Bar</name></person></xml>'
+
+parser = halfstreamxml.createParser onerror, onfinishparser, wantednodes, false
+stream = halfstreamxml.createStream onerror, onfinishstream, wantednodes, false
+
+parser.write(xml).close()
+stream.write(xml)
+stream.end()
36 test.js
View
@@ -1,17 +1,41 @@
(function() {
- var halfstreamxml, onerror, onfinish, parser, wantednodes;
+ var halfstreamxml, onerror, onfinishparser, onfinishstream, parser, stream, wantednodes, xml;
+
halfstreamxml = require('./index');
+
onerror = function(e) {
return console.error("ERROR: " + e);
};
- onfinish = function() {
- return console.log("DONE");
+
+ onfinishparser = function() {
+ return console.log("PARSER TEST DONE\n");
};
+
+ onfinishstream = function() {
+ return console.log("STREAM TEST DONE\n");
+ };
+
wantednodes = {
PERSON: function(person) {
- return console.log("received person: " + (JSON.stringify(person, ['name', 'attributes', 'age', 'children'])));
+ var str;
+ str = JSON.stringify({
+ name: person.children.tags.NAME[0].children.text.join(),
+ age: person.attributes.age
+ });
+ return console.log("received person: " + str);
}
};
- parser = halfstreamxml.createParser(onerror, onfinish, wantednodes, false);
- parser.write('<xml><person age="15"><name>Barfoo</name></person><person age="17"><name>Foo Bar</name></person></xml>').close();
+
+ xml = '<xml><person age="15"><name>Barfoo</name></person><person age="17"><name>Foo Bar</name></person></xml>';
+
+ parser = halfstreamxml.createParser(onerror, onfinishparser, wantednodes, false);
+
+ stream = halfstreamxml.createStream(onerror, onfinishstream, wantednodes, false);
+
+ parser.write(xml).close();
+
+ stream.write(xml);
+
+ stream.end();
+
}).call(this);
Something went wrong with that request. Please try again.