Skip to content

Commit

Permalink
Fix ogg parsing performance.
Browse files Browse the repository at this point in the history
:
  • Loading branch information
leetreveil committed Jul 9, 2012
1 parent 235fffb commit c5ff893
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 79 deletions.
172 changes: 94 additions & 78 deletions lib/ogg.js
@@ -1,9 +1,7 @@
var binary = require('binary'),
fs = require('fs'),
common = require('./common');
events = require('events');
util = require('util');

var fs = require('fs');
var events = require('events');
var strtok = require('strtok');
var common = require('./common');

var Ogg = module.exports = function(stream) {
this.stream = stream;
Expand All @@ -15,90 +13,108 @@ Ogg.prototype = new process.EventEmitter();
Ogg.prototype.parse = function() {
var self = this;

var innerStream = new events.EventEmitter();

var found_metadata_page = false;

// top level parser that handles the parsing of pages
binary.stream(self.stream)
.loop(function(end, vars) {
this.buffer('capture_pattern', 4)
.word8('stream_structire_version')
.word8('header_type_flag')
.word64lu('absolute_granule_position')
.word32lu('stream_serial_number')
.word32lu('page_sequence_no')
.word32lu('page_checksum')
.word8('page_segments')
.buffer('segments', 'page_segments')
.tap(function (vars) {

var page_len = 0;
for (var i=0; i < vars['segments'].length; i++) {
page_len += vars['segments'][i];
// 3mb buffer for metadata
var bigBuf = new Buffer(3145728);
var copy_offset = 0;

try {
// top level parser that handles the parsing of pages
strtok.parse(self.stream, function(v, cb) {
if (!v) {
cb.commentsRead = 0;
cb.position = 'header'; //read first OggS header
return new strtok.BufferType(27);
}

if (cb.position === 'header') {
cb.header = {
type: v.toString('utf-8', 0, 4),
version: v[4],
packet_flag: v[5],
pcm_sample_pos: 'not_implemented',
stream_serial_num: strtok.UINT32_LE.get(v, 14),
page_number: strtok.UINT32_LE.get(v, 18),
check_sum: strtok.UINT32_LE.get(v, 22),
segments: v[26]
};

//read segment table
cb.position = 'segments';
return new strtok.BufferType(cb.header.segments);
}

if (cb.position === 'segments') {
var pageLen = 0;
for (var i=0; i < v.length; i++) {
pageLen += v[i];
}

// now we have the page length we can now read
// all the page data
this.buffer('page_data', page_len)
.tap(function(vars) {
cb.position = 'page_data';
return new strtok.BufferType(pageLen);
}

//the metadata always starts on the second page
//so start emitting data from then
if (vars['page_sequence_no'] === 1) {
found_metadata_page = true;
}
//TODO: fix this crappy hack, we should be emitting
// data and parsing it with another parser
// but that isn't working atm. What we are doing
// here is attempting to read all the metadata
// everytime we read a ogg page.
//
if (cb.position === 'page_data') {
if (cb.header.page_number >= 1) {
v.copy(bigBuf, copy_offset);
copy_offset += v.length;
try {
parseMetadata();
self.emit('done');
return strtok.DONE;
} catch (ex) {}
}

cb.position = 'header';
return new strtok.BufferType(27);
}
})

if (found_metadata_page) {
innerStream.emit('data', vars['page_data']);
};
});
});
});

var comments_read = 0;

// Second level parser that handles the parsing of metadata.
// The top level parser emits data that this parser should
// handle.
binary.stream(innerStream)
.buffer('type', 7)
.word32lu('vendor_length')
.buffer('vendor_string', 'vendor_length')
.word32lu('user_comment_list_length')
.loop(function(end, vars) {
this.word32lu('comment_length')
.buffer('comment', 'comment_length')
.tap(function(vars) {
function parseMetadata () {
var offset = 0;
var header = bigBuf.slice(offset, 7);
var vendor_len = bigBuf.readUInt32LE(offset += 7);
var vendor_string = bigBuf.slice(offset += 4, offset + vendor_len).toString();
var comments_length = bigBuf.readUInt32LE(offset += vendor_len);

offset += 4;

var comments = [];
var comments_read = 0;
for (var i = 0; i < comments_length; i++) {
comments_read++;

var comm = vars['comment'].toString();

// make sure we only split the string on the first
// occurrence of = otherwise we may split in the
// middle of the data!
var i = comm.indexOf('=');
var split = [comm.slice(0, i), comm.slice(i+1)];
var comment_length = bigBuf.readUInt32LE(offset);
var comment = bigBuf.slice(offset += 4, offset + comment_length).toString();

var sp = comment.indexOf('=');
var split = [comment.slice(0, sp), comment.slice(sp+1)];

if (split[0] === 'METADATA_BLOCK_PICTURE') {
var decoded = new Buffer(split[1], 'base64');
var picture = common.readVorbisPicture(decoded);
split[1] = picture;
}

self.emit(split[0].toUpperCase(), split[1]);

if (comments_read === vars['user_comment_list_length']) {
end();
};
})
})
.word8('framing_bit')
.tap(function(vars) {
if (vars['framing_bit'] === 1) {
self.emit('done');
} else {
self.emit('done', new Error('Expected to find framing bit at end of metadata'));
comments.push({ key : split[0].toUpperCase(), value : split[1] });

if (comments_read === comments_length) {
for (var i = 0; i < comments.length; i++) {
self.emit(comments[i].key, comments[i].value);
}
return;
}

offset += comment_length;
}
})
}
} catch (exception) {
self.emit('done', exception);
return strtok.DONE;
}
}
1 change: 0 additions & 1 deletion test/test-ogg-multipagemetadatabug.js
Expand Up @@ -26,6 +26,5 @@ parser.on('metadata', function(result) {

parser.on('done', function(err) {
if (err) throw err;
stream.destroy();
assert.ok(true);
});

0 comments on commit c5ff893

Please sign in to comment.