Permalink
Browse files

Fix ogg parsing performance.

:
  • Loading branch information...
leetreveil committed Jul 9, 2012
1 parent 235fffb commit c5ff893d0785fe132999f6acbb4b003345be4928
Showing with 94 additions and 79 deletions.
  1. +94 −78 lib/ogg.js
  2. +0 −1 test/test-ogg-multipagemetadatabug.js
View
@@ -1,9 +1,7 @@
-var binary = require('binary'),
- fs = require('fs'),
- common = require('./common');
- events = require('events');
- util = require('util');
-
+var fs = require('fs');
+var events = require('events');
+var strtok = require('strtok');
+var common = require('./common');
var Ogg = module.exports = function(stream) {
this.stream = stream;
@@ -15,90 +13,108 @@ Ogg.prototype = new process.EventEmitter();
Ogg.prototype.parse = function() {
var self = this;
- var innerStream = new events.EventEmitter();
-
- var found_metadata_page = false;
-
- // top level parser that handles the parsing of pages
- binary.stream(self.stream)
- .loop(function(end, vars) {
- this.buffer('capture_pattern', 4)
- .word8('stream_structire_version')
- .word8('header_type_flag')
- .word64lu('absolute_granule_position')
- .word32lu('stream_serial_number')
- .word32lu('page_sequence_no')
- .word32lu('page_checksum')
- .word8('page_segments')
- .buffer('segments', 'page_segments')
- .tap(function (vars) {
-
- var page_len = 0;
- for (var i=0; i < vars['segments'].length; i++) {
- page_len += vars['segments'][i];
+ // 3mb buffer for metadata
+ var bigBuf = new Buffer(3145728);
+ var copy_offset = 0;
+
+ try {
+ // top level parser that handles the parsing of pages
+ strtok.parse(self.stream, function(v, cb) {
+ if (!v) {
+ cb.commentsRead = 0;
+ cb.position = 'header'; //read first OggS header
+ return new strtok.BufferType(27);
+ }
+
+ if (cb.position === 'header') {
+ cb.header = {
+ type: v.toString('utf-8', 0, 4),
+ version: v[4],
+ packet_flag: v[5],
+ pcm_sample_pos: 'not_implemented',
+ stream_serial_num: strtok.UINT32_LE.get(v, 14),
+ page_number: strtok.UINT32_LE.get(v, 18),
+ check_sum: strtok.UINT32_LE.get(v, 22),
+ segments: v[26]
+ };
+
+ //read segment table
+ cb.position = 'segments';
+ return new strtok.BufferType(cb.header.segments);
+ }
+
+ if (cb.position === 'segments') {
+ var pageLen = 0;
+ for (var i=0; i < v.length; i++) {
+ pageLen += v[i];
}
- // now we have the page length we can now read
- // all the page data
- this.buffer('page_data', page_len)
- .tap(function(vars) {
+ cb.position = 'page_data';
+ return new strtok.BufferType(pageLen);
+ }
- //the metadata always starts on the second page
- //so start emitting data from then
- if (vars['page_sequence_no'] === 1) {
- found_metadata_page = true;
- }
+ //TODO: fix this crappy hack, we should be emitting
+ // data and parsing it with another parser
+ // but that isn't working atm. What we are doing
+ // here is attempting to read all the metadata
+ // everytime we read a ogg page.
+ //
+ if (cb.position === 'page_data') {
+ if (cb.header.page_number >= 1) {
+ v.copy(bigBuf, copy_offset);
+ copy_offset += v.length;
+ try {
+ parseMetadata();
+ self.emit('done');
+ return strtok.DONE;
+ } catch (ex) {}
+ }
+
+ cb.position = 'header';
+ return new strtok.BufferType(27);
+ }
+ })
- if (found_metadata_page) {
- innerStream.emit('data', vars['page_data']);
- };
- });
- });
- });
-
- var comments_read = 0;
-
- // Second level parser that handles the parsing of metadata.
- // The top level parser emits data that this parser should
- // handle.
- binary.stream(innerStream)
- .buffer('type', 7)
- .word32lu('vendor_length')
- .buffer('vendor_string', 'vendor_length')
- .word32lu('user_comment_list_length')
- .loop(function(end, vars) {
- this.word32lu('comment_length')
- .buffer('comment', 'comment_length')
- .tap(function(vars) {
+ function parseMetadata () {
+ var offset = 0;
+ var header = bigBuf.slice(offset, 7);
+ var vendor_len = bigBuf.readUInt32LE(offset += 7);
+ var vendor_string = bigBuf.slice(offset += 4, offset + vendor_len).toString();
+ var comments_length = bigBuf.readUInt32LE(offset += vendor_len);
+
+ offset += 4;
+
+ var comments = [];
+ var comments_read = 0;
+ for (var i = 0; i < comments_length; i++) {
comments_read++;
-
- var comm = vars['comment'].toString();
- // make sure we only split the string on the first
- // occurrence of = otherwise we may split in the
- // middle of the data!
- var i = comm.indexOf('=');
- var split = [comm.slice(0, i), comm.slice(i+1)];
+ var comment_length = bigBuf.readUInt32LE(offset);
+ var comment = bigBuf.slice(offset += 4, offset + comment_length).toString();
+
+ var sp = comment.indexOf('=');
+ var split = [comment.slice(0, sp), comment.slice(sp+1)];
if (split[0] === 'METADATA_BLOCK_PICTURE') {
var decoded = new Buffer(split[1], 'base64');
var picture = common.readVorbisPicture(decoded);
split[1] = picture;
}
-
- self.emit(split[0].toUpperCase(), split[1]);
- if (comments_read === vars['user_comment_list_length']) {
- end();
- };
- })
- })
- .word8('framing_bit')
- .tap(function(vars) {
- if (vars['framing_bit'] === 1) {
- self.emit('done');
- } else {
- self.emit('done', new Error('Expected to find framing bit at end of metadata'));
+ comments.push({ key : split[0].toUpperCase(), value : split[1] });
+
+ if (comments_read === comments_length) {
+ for (var i = 0; i < comments.length; i++) {
+ self.emit(comments[i].key, comments[i].value);
+ }
+ return;
+ }
+
+ offset += comment_length;
}
- })
+ }
+ } catch (exception) {
+ self.emit('done', exception);
+ return strtok.DONE;
+ }
}
@@ -26,6 +26,5 @@ parser.on('metadata', function(result) {
parser.on('done', function(err) {
if (err) throw err;
- stream.destroy();
assert.ok(true);
});

0 comments on commit c5ff893

Please sign in to comment.