Fix ogg parsing performance.

:
leetreveil · Jul 9, 2012 · c5ff893 · c5ff893
1 parent 235fffb
commit c5ff893
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 79 deletions.
diff --git a/lib/ogg.js b/lib/ogg.js
@@ -1,9 +1,7 @@
-var binary = require('binary'),
-    fs     = require('fs'),
-    common = require('./common');
-    events = require('events');
-    util   = require('util');
-
+var fs     = require('fs');
+var events = require('events');
+var strtok = require('strtok');
+var common = require('./common');
 
 var Ogg = module.exports = function(stream) {
     this.stream = stream;
@@ -15,90 +13,108 @@ Ogg.prototype = new process.EventEmitter();
 Ogg.prototype.parse = function() {
   var self = this;
 
-  var innerStream = new events.EventEmitter();
-
-  var found_metadata_page = false;
-
-  // top level parser that handles the parsing of pages
-  binary.stream(self.stream)
-    .loop(function(end, vars) {
-      this.buffer('capture_pattern', 4)
-      .word8('stream_structire_version')
-      .word8('header_type_flag')
-      .word64lu('absolute_granule_position')
-      .word32lu('stream_serial_number')
-      .word32lu('page_sequence_no')
-      .word32lu('page_checksum')
-      .word8('page_segments')
-      .buffer('segments', 'page_segments')
-      .tap(function (vars) {
-
-        var page_len = 0;
-        for (var i=0; i < vars['segments'].length; i++) {
-          page_len += vars['segments'][i];
+  // 3mb buffer for metadata
+  var bigBuf = new Buffer(3145728);
+  var copy_offset = 0;
+
+   try {
+    // top level parser that handles the parsing of pages
+    strtok.parse(self.stream, function(v, cb) {
+      if (!v) {
+        cb.commentsRead = 0;
+        cb.position = 'header'; //read first OggS header
+        return new strtok.BufferType(27);
+      }
+
+      if (cb.position === 'header') {
+        cb.header = {
+          type: v.toString('utf-8', 0, 4),
+          version: v[4],
+          packet_flag: v[5],
+          pcm_sample_pos: 'not_implemented',
+          stream_serial_num: strtok.UINT32_LE.get(v, 14),
+          page_number: strtok.UINT32_LE.get(v, 18),
+          check_sum: strtok.UINT32_LE.get(v, 22),
+          segments: v[26]
+        };
+
+        //read segment table
+        cb.position = 'segments';
+        return new strtok.BufferType(cb.header.segments);
+      }
+
+      if (cb.position === 'segments') {
+        var pageLen = 0;
+        for (var i=0; i < v.length; i++) {
+          pageLen += v[i];
         }
 
-        // now we have the page length we can now read
-        // all the page data
-        this.buffer('page_data', page_len)
-        .tap(function(vars) {
+        cb.position = 'page_data';
+        return new strtok.BufferType(pageLen);
+      }      
 
-          //the metadata always starts on the second page
-          //so start emitting data from then
-          if (vars['page_sequence_no'] === 1) {
-            found_metadata_page = true;
-          }
+      //TODO: fix this crappy hack, we should be emitting
+      //      data and parsing it with another parser
+      //      but that isn't working atm. What we are doing
+      //      here is attempting to read all the metadata
+      //      everytime we read a ogg page.
+      //      
+      if (cb.position === 'page_data') {
+        if (cb.header.page_number >= 1) {
+          v.copy(bigBuf, copy_offset);
+          copy_offset += v.length;
+          try {
+            parseMetadata();
+            self.emit('done');
+            return strtok.DONE;
+          } catch (ex) {}
+        }
+
+        cb.position = 'header';
+        return new strtok.BufferType(27);
+      }
+    })
 
-          if (found_metadata_page) {
-            innerStream.emit('data', vars['page_data']);
-          };
-        });
-      });
-    });
-
-  var comments_read = 0;
-
-  // Second level parser that handles the parsing of metadata.
-  // The top level parser emits data that this parser should
-  // handle.
-  binary.stream(innerStream)
-    .buffer('type', 7)
-    .word32lu('vendor_length')
-    .buffer('vendor_string', 'vendor_length')
-    .word32lu('user_comment_list_length')
-    .loop(function(end, vars) {
-      this.word32lu('comment_length')
-      .buffer('comment', 'comment_length')
-      .tap(function(vars) {
+    function parseMetadata () {
+      var offset = 0;
+      var header = bigBuf.slice(offset, 7);
+      var vendor_len = bigBuf.readUInt32LE(offset += 7);
+      var vendor_string = bigBuf.slice(offset += 4, offset + vendor_len).toString();
+      var comments_length = bigBuf.readUInt32LE(offset += vendor_len);
+
+      offset += 4;
+
+      var comments = [];
+      var comments_read = 0;
+      for (var i = 0; i < comments_length; i++) {
         comments_read++;
-
-        var comm = vars['comment'].toString();
 
-        // make sure we only split the string on the first
-        // occurrence of = otherwise we may split in the
-        // middle of the data!
-        var i = comm.indexOf('=');
-        var split = [comm.slice(0, i), comm.slice(i+1)];
+        var comment_length = bigBuf.readUInt32LE(offset);
+        var comment = bigBuf.slice(offset += 4, offset + comment_length).toString();
+
+        var sp = comment.indexOf('=');
+        var split = [comment.slice(0, sp), comment.slice(sp+1)];
 
         if (split[0] === 'METADATA_BLOCK_PICTURE') {  
           var decoded = new Buffer(split[1], 'base64');
           var picture = common.readVorbisPicture(decoded);
           split[1] = picture;
         }
-
-        self.emit(split[0].toUpperCase(), split[1]);
 
-        if (comments_read === vars['user_comment_list_length']) {
-          end();
-        };
-      })
-    })
-    .word8('framing_bit')
-    .tap(function(vars) {
-      if (vars['framing_bit'] === 1) {
-        self.emit('done');
-      } else {
-        self.emit('done', new Error('Expected to find framing bit at end of metadata'));
+        comments.push({ key : split[0].toUpperCase(), value : split[1] });
+
+        if (comments_read === comments_length) {
+          for (var i = 0; i < comments.length; i++) {
+            self.emit(comments[i].key, comments[i].value);
+          }
+          return;
+        }
+
+        offset += comment_length;
       }
-    })
+    }
+  } catch (exception) {
+    self.emit('done', exception);
+    return strtok.DONE;
+  }
 }
diff --git a/test/test-ogg-multipagemetadatabug.js b/test/test-ogg-multipagemetadatabug.js
@@ -26,6 +26,5 @@ parser.on('metadata', function(result) {
 
 parser.on('done', function(err) {
   if (err) throw err;
-  stream.destroy();
   assert.ok(true);
 });