Skip to content

Commit

Permalink
Update to pure-js sax-stream XML parser
Browse files Browse the repository at this point in the history
  • Loading branch information
rubensworks committed Aug 23, 2018
1 parent fdb12fe commit c231ff4
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 98 deletions.
62 changes: 32 additions & 30 deletions lib/SparqlXmlParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import * as RDF from "rdf-js";
import {Readable} from "stream";
import {SparqlXmlBindingsTransformer} from "./SparqlXmlBindingsTransformer";
// tslint:disable-next-line:no-var-requires
const XmlStream = require('xml-stream');
const XmlNode = require('sax-stream');

/**
* Parser for the SPARQL Query Results XML format.
Expand All @@ -30,23 +30,23 @@ export class SparqlXmlParser {
* @return {NodeJS.ReadableStream} A stream of bindings.
*/
public parseXmlResultsStream(sparqlResponseStream: NodeJS.ReadableStream): NodeJS.ReadableStream {
// Collect variables
const variables: RDF.Variable[] = [];
sparqlResponseStream
.pipe(XmlNode({ strict: true, tag: 'variable' }))
.on('data', (node: any) => variables.push(this.dataFactory.variable(node.attribs.name)))
.on('error', () => { return; }) // Ignore errors, they will emitted in the results
.on('finish', () => resultStream.emit('variables', variables));

const rawResultStream = new Readable({ objectMode: true });
rawResultStream._read = () => { return; };
// Collect results
const resultStream = sparqlResponseStream
.pipe(XmlNode({ strict: true, tag: 'result' }))
.on('error', (error: Error) => resultStream.emit('error', error))
.pipe(new SparqlXmlBindingsTransformer(this));

const xmlParser = new XmlStream(sparqlResponseStream);
xmlParser.collect('binding', true);
xmlParser.on('error', (error: Error) => resultStream.emit('error', error));
xmlParser.on('endElement: head > variable', (node: any) => variables.push(this.dataFactory.variable(node.$.name)));
xmlParser.on('endElement: results result', (bindings: any) => rawResultStream.push(bindings));
xmlParser.on('end', () => {
resultStream.emit('variables', variables);
rawResultStream.push(null);
});

const resultStream = rawResultStream.pipe(new SparqlXmlBindingsTransformer(this));
// Propagate errors
sparqlResponseStream.on('error', (error) => resultStream.emit('error', error));

return resultStream;
}

Expand All @@ -57,22 +57,23 @@ export class SparqlXmlParser {
*/
public parseXmlBindings(rawBindings: any): IBindings {
const bindings: IBindings = {};
for (const binding of rawBindings.binding) {
const key = binding.$.name;
for (const binding of rawBindings.children.binding) {
const key = binding.attribs.name;
let value: RDF.Term = null;
if (binding.bnode) {
value = this.dataFactory.blankNode(binding.bnode);
} else if (binding.literal) {
if (binding.literal.$ && binding.literal.$['xml:lang']) {
value = this.dataFactory.literal(binding.literal.$text, binding.literal.$['xml:lang']);
} else if (binding.literal.$ && binding.literal.$.datatype) {
value = this.dataFactory.literal(binding.literal.$text,
this.dataFactory.namedNode(binding.literal.$.datatype));
if (binding.children.bnode) {
value = this.dataFactory.blankNode(binding.children.bnode.value);
} else if (binding.children.literal) {
if (binding.children.literal.attribs && binding.children.literal.attribs['xml:lang']) {
value = this.dataFactory.literal(binding.children.literal.value,
binding.children.literal.attribs['xml:lang']);
} else if (binding.children.literal.attribs && binding.children.literal.attribs.datatype) {
value = this.dataFactory.literal(binding.children.literal.value,
this.dataFactory.namedNode(binding.children.literal.attribs.datatype));
} else {
value = this.dataFactory.literal(binding.literal);
value = this.dataFactory.literal(binding.children.literal.value);
}
} else {
value = this.dataFactory.namedNode(binding.uri);
value = this.dataFactory.namedNode(binding.children.uri.value);
}
bindings[this.prefixVariableQuestionMark ? ('?' + key) : key] = value;
}
Expand All @@ -88,10 +89,11 @@ export class SparqlXmlParser {
public parseXmlBooleanStream(sparqlResponseStream: NodeJS.ReadableStream): Promise<boolean> {
return new Promise((resolve, reject) => {
sparqlResponseStream.on('error', reject);
const xmlParser = new XmlStream(sparqlResponseStream);
xmlParser.on('error', reject);
xmlParser.on('endElement: boolean', (node: any) => resolve(node.$text === 'true'));
xmlParser.on('end', () => reject(new Error('No valid ASK response was found.')));
sparqlResponseStream
.pipe(XmlNode({ strict: true, tag: 'boolean' }))
.on('error', reject)
.on('data', (node: any) => resolve(node.value === 'true'))
.on('end', () => reject(new Error('No valid ASK response was found.')));
});
}

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,6 @@
},
"dependencies": {
"@rdfjs/data-model": "^1.1.0",
"xml-stream": "^0.4.5"
"sax-stream": "^1.2.3"
}
}
119 changes: 94 additions & 25 deletions test/SparqlXmlParser-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -241,69 +241,138 @@ describe('SparqlXmlParser', () => {
describe('#parseXmlBindings', () => {
it('should convert bindings with named nodes', () => {
const binding = {
binding: [
{ uri: 'http://example.org/book/book6', $: { name: 'book' } },
],
children: {
binding: [
{
attribs: { name: 'book' },
children: {
uri: { value: 'http://example.org/book/book6' },
},
},
],
},
};
return expect(parser.parseXmlBindings(binding))
.toEqual({ '?book': namedNode('http://example.org/book/book6') });
});

it('should convert bindings with named nodes without variable prefixing', () => {
const binding = {
binding: [
{ uri: 'http://example.org/book/book6', $: { name: 'book' } },
],
children: {
binding: [
{
attribs: { name: 'book' },
children: {
uri: { value: 'http://example.org/book/book6' },
},
},
],
},
};
return expect(new SparqlXmlParser().parseXmlBindings(binding))
.toEqual({ book: namedNode('http://example.org/book/book6') });
});

it('should convert bindings with blank nodes', () => {
const binding = {
binding: [
{ bnode: 'abc', $: { name: 'book' } },
],
children: {
binding: [
{
attribs: { name: 'book' },
children: {
bnode: { value: 'abc' },
},
},
],
},
};
return expect(parser.parseXmlBindings(binding)).toEqual({ '?book': blankNode('abc') });
});

it('should convert bindings with literals', () => {
const binding = {
binding: [
{ literal: 'abc', $: { name: 'book' } },
],
children: {
binding: [
{
attribs: { name: 'book' },
children: {
literal: { value: 'abc' },
},
},
],
},
};
return expect(parser.parseXmlBindings(binding)).toEqual({ '?book': literal('abc') });
});

it('should convert bindings with languaged literals', () => {
const binding = {
binding: [
{ literal: { $text: 'abc', $: { 'xml:lang': 'en-us' } }, $: { name: 'book' } },
],
children: {
binding: [
{
attribs: { name: 'book' },
children: {
literal: { value: 'abc', attribs: { 'xml:lang': 'en-us' } },
},
},
],
},
};
return expect(parser.parseXmlBindings(binding)).toEqual({ '?book': literal('abc', 'en-us') });
});

it('should convert bindings with datatyped literals', () => {
const binding = {
binding: [
{ literal: { $text: 'abc', $: { datatype: 'http://ex' } }, $: { name: 'book' } },
],
children: {
binding: [
{
attribs: { name: 'book' },
children: {
literal: { value: 'abc', attribs: { datatype: 'http://ex' } },
},
},
],
},
};
return expect(parser.parseXmlBindings(binding)).toEqual({ '?book': literal('abc', namedNode('http://ex')) });
});

it('should convert mixed bindings', () => {
const binding = {
binding: [
{ uri: 'http://example.org/book/book6', $: { name: 'book1' } },
{ bnode: 'abc', $: { name: 'book2' } },
{ literal: 'abc', $: { name: 'book3' } },
{ literal: { $text: 'abc', $: { 'xml:lang': 'en-us' } }, $: { name: 'book4' } },
{ literal: { $text: 'abc', $: { datatype: 'http://ex' } }, $: { name: 'book5' } },
],
children: {
binding: [
{
attribs: { name: 'book1' },
children: {
uri: { value: 'http://example.org/book/book6' },
},
},
{
attribs: { name: 'book2' },
children: {
bnode: { value: 'abc' },
},
},
{
attribs: { name: 'book3' },
children: {
literal: { value: 'abc' },
},
},
{
attribs: { name: 'book4' },
children: {
literal: { value: 'abc', attribs: { 'xml:lang': 'en-us' } },
},
},
{
attribs: { name: 'book5' },
children: {
literal: { value: 'abc', attribs: { datatype: 'http://ex' } },
},
},
],
},
};
return expect(parser.parseXmlBindings(binding)).toEqual({
'?book1': namedNode('http://example.org/book/book6'),
Expand Down
53 changes: 11 additions & 42 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,6 @@ bcrypt-pbkdf@^1.0.0:
dependencies:
tweetnacl "^0.14.3"

bindings@^1.2.1:
version "1.3.0"
resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.3.0.tgz#b346f6ecf6a95f5a815c5839fc7cdb22502f1ed7"

brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
Expand Down Expand Up @@ -657,7 +653,7 @@ data-urls@^1.0.0:
whatwg-mimetype "^2.1.0"
whatwg-url "^7.0.0"

debug@^2.1.2, debug@^2.2.0, debug@^2.3.3, debug@^2.6.8, debug@^2.6.9:
debug@^2.1.2, debug@^2.2.0, debug@^2.3.3, debug@^2.6.8, debug@^2.6.9, debug@~2:
version "2.6.9"
resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f"
dependencies:
Expand Down Expand Up @@ -1214,12 +1210,6 @@ iconv-lite@0.4.23, iconv-lite@^0.4.4:
dependencies:
safer-buffer ">= 2.1.2 < 3"

iconv@^2.1.4:
version "2.3.0"
resolved "https://registry.yarnpkg.com/iconv/-/iconv-2.3.0.tgz#9739887c2bd492d9a5e236dd3667c5358601201b"
dependencies:
nan "^2.3.5"

ignore-walk@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.1.tgz#a83e62e7d272ac0e3b551aaa82831a19b69f82f8"
Expand All @@ -1244,7 +1234,7 @@ inflight@^1.0.4:
once "^1.3.0"
wrappy "1"

inherits@2, inherits@^2.0.3, inherits@~2.0.1, inherits@~2.0.3:
inherits@2, inherits@^2.0.3, inherits@~2.0.3:
version "2.0.3"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"

Expand Down Expand Up @@ -2140,7 +2130,7 @@ ms@2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"

nan@^2.10.0, nan@^2.3.5, nan@^2.9.2:
nan@^2.9.2:
version "2.10.0"
resolved "https://registry.yarnpkg.com/nan/-/nan-2.10.0.tgz#96d0cd610ebd58d4b4de9cc0c6828cda99c7548f"

Expand Down Expand Up @@ -2172,13 +2162,6 @@ needle@^2.2.1:
iconv-lite "^0.4.4"
sax "^1.2.4"

node-expat@^2.3.1:
version "2.3.17"
resolved "https://registry.yarnpkg.com/node-expat/-/node-expat-2.3.17.tgz#5fab92c16737ec5b9beafdeba99ecdaba1ebc466"
dependencies:
bindings "^1.2.1"
nan "^2.10.0"

node-int64@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
Expand Down Expand Up @@ -2561,15 +2544,6 @@ read-pkg@^1.0.0:
normalize-package-data "^2.3.2"
path-type "^1.0.0"

readable-stream@^1.0.31:
version "1.1.14"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-1.1.14.tgz#7cf4c54ef648e3813084c636dd2079e166c081d9"
dependencies:
core-util-is "~1.0.0"
inherits "~2.0.1"
isarray "0.0.1"
string_decoder "~0.10.x"

readable-stream@^2.0.1, readable-stream@^2.0.6, readable-stream@^2.2.2:
version "2.3.6"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.6.tgz#b11c27d88b8ff1fbe070643cf94b0c79ae1b0aaf"
Expand Down Expand Up @@ -2743,7 +2717,14 @@ sane@^2.0.0:
optionalDependencies:
fsevents "^1.2.3"

sax@^1.2.4:
sax-stream@^1.2.3:
version "1.2.3"
resolved "https://registry.yarnpkg.com/sax-stream/-/sax-stream-1.2.3.tgz#5f6b08f26a5a1bb8afa86c1723545fd36831beae"
dependencies:
debug "~2"
sax "~1"

sax@^1.2.4, sax@~1:
version "1.2.4"
resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"

Expand Down Expand Up @@ -2962,10 +2943,6 @@ string-width@^1.0.1:
is-fullwidth-code-point "^2.0.0"
strip-ansi "^4.0.0"

string_decoder@~0.10.x:
version "0.10.31"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-0.10.31.tgz#62e203bc41766c6c28c9fc84301dab1c5310fa94"

string_decoder@~1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
Expand Down Expand Up @@ -3360,14 +3337,6 @@ xml-name-validator@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"

xml-stream@^0.4.5:
version "0.4.5"
resolved "https://registry.yarnpkg.com/xml-stream/-/xml-stream-0.4.5.tgz#7452d85b37f9b881a70eff0cf74a0df02088edeb"
dependencies:
iconv "^2.1.4"
node-expat "^2.3.1"
readable-stream "^1.0.31"

y18n@^3.2.1:
version "3.2.1"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-3.2.1.tgz#6d15fba884c08679c0d77e88e7759e811e07fa41"
Expand Down

0 comments on commit c231ff4

Please sign in to comment.