Permalink
Browse files

Can now de-unsynchronize frames in id3v2.4 tags, bumped to 0.0.5

  • Loading branch information...
1 parent 7c78c48 commit bdad0ce200f0e4e183a814b0be62ed171ee449ca @leetreveil committed Mar 3, 2011
Showing with 166 additions and 143 deletions.
  1. +21 −0 lib/common.js
  2. +73 −121 lib/id3v2.js
  3. +48 −19 lib/id3v2_frames.js
  4. +1 −1 package.json
  5. +4 −1 test/alltests.js
  6. BIN test/samples/id3v2.4.mp3
  7. +17 −0 test/test-deunsync.js
  8. +2 −1 test/test-id3v2.4.js
View
@@ -1,3 +1,24 @@
+exports.removeUnsyncBytes = function(bytes) {
+ var output = [],
+ safe = true;
+
+ for (var i = 0; i < bytes.length ; i++) {
+ var val = bytes[i];
+ if(safe){
+ output.push(val);
+ safe = (val !== 0xFF);
+ }else{
+ if(val !== 0x00){
+ output.push(val);
+ }else{
+ //console.log('found unsync byte at: ' + i);
+ }
+ safe = true;
+ }
+ }
+ return new Buffer(output);
+};
+
exports.findZero = function (buffer, start, end) {
var i = start;
while (buffer[i] !== 0) {
View
@@ -16,154 +16,106 @@ id3v2.prototype.parse = function(){
var self = this;
strtok.parse(self.stream, function(v,cb){
if (v === undefined){
- return new strtok.StringType(3, 'ascii');
+ cb.position = 'header';
+ return new strtok.BufferType(10);
}
- if (v === 'ID3'){
- cb.header = true;
- return new strtok.BufferType(7);
- }
-
- if(cb.header){
- cb.header = false;
- //root header
- if (v[0] > 4){
+ if(cb.position === 'header'){
+ if(v.toString('ascii',0, 3) !== 'ID3'){
self.emit('done');
return strtok.DONE;
}
-
- var id3 = {
- version: '2.' + v[0] + '.' + v[1],
- major: v[0],
- unsync: strtok.BITSET.get(v, 2, 7),
- xheader: strtok.BITSET.get(v, 2, 6),
- xindicator: strtok.BITSET.get(v, 2, 5),
- footer: strtok.BITSET.get(v, 2, 4),
- size: strtok.INT32SYNCSAFE.get(v, 3)
+
+ cb.header = {
+ version: '2.' + v[3] + '.' + v[4],
+ major: v[3],
+ unsync: strtok.BITSET.get(v, 5, 7),
+ xheader: strtok.BITSET.get(v, 5, 6),
+ xindicator: strtok.BITSET.get(v, 5, 5),
+ footer: strtok.BITSET.get(v, 5, 4),
+ size: strtok.INT32SYNCSAFE.get(v, 6)
};
-
- cb.major = id3.major;
-
- if (id3.xheader){
- cb.xheader = true;
+
+ if (cb.header.xheader){
+ cb.position = 'xheader';
return strtok.UINT32_BE;
}
- //expect the first frames name next
- switch (id3.major){
+ //expect the first frames header next
+ cb.position = 'frameheader';
+ switch (cb.header.major){
case 2 :
- return new strtok.StringType(3, 'ascii');
- case 3 :
- return new strtok.StringType(4, 'ascii');
- case 4 :
- return new strtok.StringType(4, 'ascii');
+ return new strtok.BufferType(6);
+ case 3 : case 4 :
+ return new strtok.BufferType(10);
}
self.emit('done');
return strtok.DONE;
}
- if(cb.xheader){
- cb.xheader = false;
- //skip xheader
- return new strtok.BufferType(v);
- }
-
- //frame buffer
- if (typeof v === 'object' && v.length === cb.frameLength){
- var frame, encoding;
- switch (cb.major){
- case 2 :
- encoding = getTextEncoding(v[0]);
- frame = parser.readData(v.slice(1,v.length), cb.frameId, encoding , null, cb.major);
- self.emit(cb.frameId, frame);
- return new strtok.StringType(3, 'ascii');
- case 3 : case 4:
- var frameFlags = readFrameFlags(v.slice(0,2));
- encoding = getTextEncoding(v[2]);
- var frameData = v.slice(3,v.length);
-
- if(frameFlags.format.unsync){
- frameData = removeUnsyncBytes(v.slice(3,v.length));
- }
- frame = parser.readData(frameData, cb.frameId, encoding, frameFlags, cb.major);
- self.emit(cb.frameId, frame);
- return new strtok.StringType(4, 'ascii');
- }
- }
-
- if (typeof v === 'number'){
- //read frame including header next
- if(v > 0){
- cb.frameLength = (cb.major > 2) ? v + 2 : v;
- return new strtok.BufferType(cb.frameLength);
- }else{
- self.emit('done');
- return strtok.DONE;
- }
+ if(cb.position === 'xheader'){
+ cb.position = 'frameheader';
+ //TODO: this will not work because we do not detect raw objects
+ return new strtok.BufferType(v); //skip xheader
}
- if (typeof v === 'string'){
- cb.frameId = v;
-
- // Last frame
- if (v === '' || v === '\u0000\u0000\u0000\u0000'){
- self.emit('done');
- return strtok.DONE;
- }
+ if(cb.position === 'frameheader'){
+ cb.position = 'framedata';
+ var header = cb.frameHeader = {};
- //check first char is a letter, bit of defensive programming
- if('ABCDEFGHIJKLMNOPQRSTUVWXYZ'.search(v[0]) === -1){
- self.emit('done');
- return strtok.DONE;
- }
-
- switch (cb.major){
+ switch (cb.header.major){
case 2 :
- return strtok.UINT24_BE;
+ header.id = v.toString('ascii', 0, 3);
+ header.length = strtok.UINT24_BE.get(v, 3, 6);
+ break;
case 3 :
- return strtok.UINT32_BE;
+ header.id = v.toString('ascii', 0, 4);
+ header.length = strtok.UINT32_BE.get(v, 4, 8);
+ header.flags = readFrameFlags(v.slice(8, 10));
+ break;
case 4 :
- return strtok.INT32SYNCSAFE;
+ header.id = v.toString('ascii', 0, 4);
+ header.length = strtok.INT32SYNCSAFE.get(v, 4, 8);
+ header.flags = readFrameFlags(v.slice(8, 10));
+ break;
}
- }
-
- });
-};
-var removeUnsyncBytes = function(bytes) {
- var filteredBytes = [],
- previousByte = 0x00;
-
- for(var i in bytes){
- var value = bytes[i];
-
- if(value === 0x00 && previousByte === 0xFF){
- //console.log('found unsync byte');
- }else{
- filteredBytes.push(value);
+ // Last frame. Check first char is a letter, bit of defensive programming
+ if (header.id === '' || header.id === '\u0000\u0000\u0000\u0000'
+ || 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'.search(header.id[0]) === -1 ){
+
+ self.emit('done');
+ return strtok.DONE;
+ }
+
+ return new strtok.BufferType(header.length);
}
- previousByte = value;
- };
-
- console.log(new Buffer(filteredBytes));
-
- return new Buffer(filteredBytes);
-};
+ if (cb.position === 'framedata'){
+ cb.position = 'frameheader';
+
+ var frame, encoding;
+ switch (cb.header.major){
+ case 2 :
+ frame = parser.readData(v, cb.frameHeader.id , null, cb.header.major);
+ self.emit(cb.frameHeader.id, frame);
+ return new strtok.BufferType(6);
+ case 3 : case 4:
+ if(cb.frameHeader.flags.format.unsync){
+ v = common.removeUnsyncBytes(v);
+ }
-var getTextEncoding = function(byte) {
- switch (byte) {
- case 0x00:
- // ISO-8859-1
- return 'ascii';
- case 0x01: case 0x02:
- return 'utf16';
- case 0x03:
- return 'utf8';
- default:
- return 'utf8';
- }
+ if(cb.frameHeader.flags.format.data_length_indicator){
+ v = v.slice(4, v.length); //TODO: do we need to do something with this?
+ }
+
+ frame = parser.readData(v, cb.frameHeader.id, cb.frameHeader.flags, cb.header.major);
+ self.emit(cb.frameHeader.id, frame);
+ return new strtok.BufferType(10);
+ }
+ }
+ });
};
var readFrameFlags = function(b) {
View
@@ -4,53 +4,63 @@ var Buffer = require('buffer').Buffer,
findZero = common.findZero,
decodeString = common.decodeString;
-exports.readData = function readData (b, type, encoding, flags, major) {
- var length = b.length;
-
- var orig_type = type;
+exports.readData = function readData(b, type, flags, major) {
+ var encoding,
+ length = b.length,
+ orig_type = type,
+ offset = 0;
+
if (type[0] === 'T'){
type = 'T*';
+ encoding = getTextEncoding(b[0]);
}
- var offset = 0;
-
switch (type) {
case 'PIC':
case 'APIC':
var pic = {};
-
+
switch (major) {
case 2:
- pic.format = b.toString('ascii', offset, offset + 3);
+ encoding = getTextEncoding(b[0]);
+ offset += 1;
+ pic.format = b.toString(encoding, offset, offset + 3);
offset += 3;
break;
case 3:
+ encoding = getTextEncoding(b[0]);
+ offset += 1;
pic.format = decodeString(b, encoding, offset, findZero(b, offset, length));
- offset += 1 + pic.format.length;
pic.format = pic.format.text;
+ offset += 1 + pic.format.length;
break;
case 4:
- offset += 4;
+ encoding = getTextEncoding(b[0]);
+ offset += 1;
pic.format = decodeString(b, encoding, offset, findZero(b, offset, length));
- offset += 1 + pic.format.length;
pic.format = pic.format.text;
+ offset += 1 + pic.format.length;
break;
}
-
+
pic.type = common.PICTURE_TYPE[b[offset]];
offset += 1;
-
+
pic.description = decodeString(b, encoding, offset, findZero(b, offset, length));
- offset += 1 + pic.description.length;
pic.description = pic.description.text;
- pic.data = data = b.slice(offset, length);
+ offset += 1 + pic.description.length;
+
+ pic.data = b.slice(offset, length);
return pic;
case 'COM':
case 'COMM':
var comment = {};
- comment.language = b.toString('ascii', offset, offset + 3);
+ encoding = getTextEncoding(b[0]);
+ offset +=1;
+
+ comment.language = b.toString(encoding, offset, offset + 3);
offset += 3;
comment.short_description = decodeString(b, encoding, offset, findZero(b, offset, length));
@@ -64,7 +74,7 @@ exports.readData = function readData (b, type, encoding, flags, major) {
return strtok.UINT32_BE.get(b, 0);
case 'T*':
- var decoded = decodeString(b, encoding, 0, length).text;
+ var decoded = decodeString(b, encoding, 1, length).text;
//trim any whitespace and any leading or trailing null characters
decoded = decoded.trim().replace(/^\x00+/,'').replace(/\x00+$/,'');
//convert to an array split by null characters
@@ -105,14 +115,33 @@ exports.readData = function readData (b, type, encoding, flags, major) {
case 'ULT':
case 'USLT':
var lyrics = {};
+
+ encoding = getTextEncoding(b[0]);
+ offset += 1;
- lyrics.language = b.toString('ascii', offset, offset + 3);
+ lyrics.language = b.toString(encoding, offset, offset + 3);
offset += 3;
+
lyrics.descriptor = decodeString(b, encoding, offset, findZero(b, offset, length));
- offset += 1 + lyrics.descriptor.length;
+ offset += 1 + lyrics.descriptor.length;
lyrics.descriptor = lyrics.descriptor.text;
+
lyrics.text = decodeString(b, encoding, offset, length);
lyrics.text = lyrics.text.text;
+
return lyrics;
}
+};
+
+var getTextEncoding = function(byte) {
+ switch (byte) {
+ case 0x00:
+ return 'ascii'; // ISO-8859-1
+ case 0x01: case 0x02:
+ return 'utf16';
+ case 0x03:
+ return 'utf8';
+ default:
+ return 'utf8';
+ }
};
View
@@ -1,7 +1,7 @@
{
"name": "musicmetadata",
"description": "Music metadata library for node, using pure Javascript.",
- "version": "0.0.4",
+ "version": "0.0.5",
"author": "Lee Treveil",
"engine": [ "node >=0.3.1" ],
"dependencies": {
View
@@ -1,3 +1,6 @@
if (module == require.main) {
- require('async_testing').run(__dirname, process.ARGV);
+ var tests = ['test-deunsync.js', 'test-id3v1.1.js', 'test-id3v2.2.js',
+ 'test-id3v2.3.js', 'test-id3v2.4.js', 'test-id4.js'];
+
+ require('async_testing').run(tests, process.ARGV);
}
View
Binary file not shown.
Oops, something went wrong.

0 comments on commit bdad0ce

Please sign in to comment.