Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
please note that TextDecoderStream needs to be used on binary streams this version is using web streams instead of node streams using TransformStream allows direct usage in web browser using in node is also possible since node's pipe accepts web streams
- Loading branch information
Showing
4 changed files
with
281 additions
and
320 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,194 +1,159 @@ | ||
const { Transform } = require('stream'); | ||
const { StringDecoder } = require('string_decoder'); | ||
const sax = require('sax'); | ||
const Saxophone = require('@pirxpilot/saxophone'); | ||
const stack = require('./stack'); | ||
|
||
const debug = require('debug')('sax-super-stream'); | ||
|
||
const stack = require('./stack'); | ||
/* global TransformStream, EventTarget */ | ||
|
||
|
||
module.exports = initParser; | ||
|
||
// used to mark all tags that we want to skip | ||
const IGNORE = Object.create(null); | ||
const IGNORE = Symbol('ignore'); | ||
|
||
function stripPrefix(name) { | ||
const index = name.indexOf(':'); | ||
return index < 0 ? name : name.slice(index + 1); | ||
const prefixed = name.split(':', 2); | ||
return prefixed[1] ? prefixed : [undefined, prefixed[0]]; | ||
} | ||
|
||
function handlers(parserConfig, fn) { | ||
function makeHandlers(parserConfig, fn) { | ||
const ns = Object.create(null); | ||
let defaultNS; | ||
const items = stack(); | ||
const parsers = stack(parserConfig); | ||
const context = {}; | ||
let cdata; | ||
|
||
function onopentag(node) { | ||
const tag = node.local; | ||
const target = new EventTarget(); | ||
target.addEventListener('tagopen', ontagopen); | ||
target.addEventListener('tagclose', ontagclose); | ||
target.addEventListener('text', ontext); | ||
target.addEventListener('cdata', ontext); | ||
return target; | ||
|
||
function ontagopen({ detail: { name, attrs, isSelfClosing } }) { | ||
const [prefix, tag] = stripPrefix(name); | ||
const attrsObj = Saxophone.parseAttrs(attrs); | ||
updateNamespaces(attrsObj); | ||
const uri = (prefix && ns[prefix]) ?? defaultNS; | ||
|
||
const tagParser = verifyNS(parsers.top()[tag]); | ||
let elem; | ||
|
||
debug('tagopen', tag, attrs); | ||
if (!tagParser) { | ||
parsers.push(IGNORE, tag); | ||
return; | ||
} | ||
if (typeof tagParser === 'function') { | ||
parseWith(tagParser); | ||
} else { | ||
if (typeof tagParser.$ === 'function') { | ||
parseWith(tagParser.$); | ||
} | ||
parsers.push(tagParser, tag); | ||
} | ||
if (isSelfClosing) { | ||
ontagclose({ detail: { name } }); | ||
} | ||
|
||
function parseWith(tp) { | ||
elem = tp.call(items, node, items.top(), context); | ||
const attributes = Object.fromEntries( | ||
Object.entries(attrsObj).map( | ||
([name, value]) => [ | ||
name, | ||
{ | ||
name, | ||
value: Saxophone.parseEntities(value) | ||
} | ||
] | ||
) | ||
); | ||
const node = { prefix, tag, attributes }; | ||
const elem = tp.call(items, node, items.top(), context); | ||
if (elem) { | ||
items.push(elem, tag); | ||
} | ||
} | ||
|
||
// if parser specifies namespace, it has to much naode namespace | ||
// if parser specifies namespace, it has to match node namespace | ||
function verifyNS(tp) { | ||
if (!tp) { | ||
return tp; | ||
} | ||
if (!tp.$uri) { | ||
return tp; | ||
} | ||
if (tp.$uri === node.uri) { | ||
if (tp.$uri === uri) { | ||
return tp; | ||
} | ||
} | ||
|
||
debug('onopentag', tag); | ||
if (!tagParser) { | ||
parsers.push(IGNORE, tag); | ||
return; | ||
} | ||
if (typeof tagParser === 'function') { | ||
parseWith(tagParser); | ||
} else { | ||
if (typeof tagParser.$ === 'function') { | ||
parseWith(tagParser.$); | ||
} | ||
parsers.push(tagParser, tag); | ||
} | ||
} | ||
|
||
function onclosetag(tag) { | ||
let parser; | ||
let top; | ||
|
||
tag = stripPrefix(tag); | ||
function ontagclose({ detail: { name } }) { | ||
const [, tag] = stripPrefix(name); | ||
|
||
debug('closetag', tag); | ||
debug('tagclose', tag); | ||
|
||
parser = parsers.pop(tag); | ||
if (parser && typeof parser.$after === 'function') { | ||
const parser = parsers.pop(tag); | ||
if (typeof parser?.$after === 'function') { | ||
debug('$after', tag); | ||
parser.$after.call(items, items.top(), context); | ||
} | ||
if (parser !== IGNORE) { | ||
top = items.pop(tag); | ||
const top = items.pop(tag); | ||
// if nothing on the stack emit result | ||
if (top !== undefined && items.empty()) { | ||
fn(null, top); | ||
} | ||
} | ||
} | ||
|
||
function ontext(value) { | ||
const textParser = parsers.top().$text; | ||
if (textParser) { | ||
textParser.call(items, value, items.top(), context); | ||
} | ||
} | ||
function ontext({ detail: { contents } }) { | ||
debug('text', contents); | ||
|
||
function onopencdata() { | ||
const textParser = parsers.top().$text; | ||
if (textParser) { | ||
cdata = []; | ||
} | ||
textParser?.call(items, Saxophone.parseEntities(contents), items.top(), context); | ||
} | ||
|
||
function oncdata(value) { | ||
if (cdata) { | ||
cdata.push(value); | ||
} | ||
} | ||
function updateNamespaces(attrsObj) { | ||
Object.entries(attrsObj).forEach(xmlns); | ||
|
||
function onclosecdata() { | ||
if (!cdata) { | ||
return; | ||
function xmlns([name, value]) { | ||
if (name === 'xmlns') { | ||
defaultNS = value; | ||
} else if (name.startsWith('xmlns:')) { | ||
const prefix = name.slice(6); // 'xmlns:'.length === 6 | ||
ns[prefix] = value; | ||
} | ||
} | ||
const textParser = parsers.top().$text; | ||
textParser.call(items, cdata.join(''), items.top(), context); | ||
cdata = undefined; | ||
} | ||
|
||
function onerror(err) { | ||
debug('Detected error', err); | ||
// mark error as handled | ||
this.error = null; | ||
fn(err); | ||
} | ||
|
||
return { | ||
onopentag, | ||
onclosetag, | ||
ontext, | ||
onopencdata, | ||
oncdata, | ||
onclosecdata, | ||
onerror | ||
}; | ||
} | ||
|
||
|
||
function initParser(parserConfig, saxOptions) { | ||
function initParser(config) { | ||
|
||
saxOptions = Object.assign({ | ||
trim: true, | ||
normalize: true, | ||
lowercase: false, | ||
xmlns: true, | ||
position: false, | ||
strictEntities: true, | ||
noscript: true | ||
}, saxOptions); | ||
const results = []; | ||
|
||
const parser = sax.parser(true, saxOptions); | ||
const decoder = new StringDecoder('utf8'); | ||
let results = []; | ||
let parserError; | ||
const target = makeHandlers(config, (err, obj) => results.push(obj)); | ||
const sax = new Saxophone(target); | ||
const writer = sax.getWriter(); | ||
|
||
Object.assign(parser, handlers(parserConfig, (err, obj) => { | ||
if (!err) { | ||
results.push(obj); | ||
} else { | ||
// only report the first error | ||
parserError = parserError || err; | ||
} | ||
})); | ||
|
||
const ts = new Transform({ | ||
readableObjectMode: true, | ||
flush(next) { | ||
parser.close(); | ||
if (parserError) { | ||
return next(parserError); | ||
} | ||
flush(this); | ||
next(); | ||
return new TransformStream({ | ||
async flush(controller) { | ||
await writer.ready; | ||
await writer.close(); | ||
flush(controller); | ||
controller.terminate(); | ||
}, | ||
transform(chunk, encoding, next) { | ||
if (parserError) { | ||
return next(parserError); | ||
} | ||
write(chunk); | ||
flush(this); | ||
next(); | ||
async transform(chunk, controller) { | ||
debug('writing', chunk); | ||
await writer.ready; | ||
await writer.write(chunk); | ||
flush(controller); | ||
} | ||
}); | ||
return ts; | ||
|
||
function write(chunk) { | ||
const str = decoder.write(chunk); | ||
parser.write(str); | ||
} | ||
|
||
function flush(stream) { | ||
if (!results.length) { | ||
return; | ||
} | ||
results.forEach(r => { | ||
stream.push(r); | ||
}); | ||
results = []; | ||
function flush(controller) { | ||
results.forEach(r => controller.enqueue(r)); | ||
results.length = 0; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.