Permalink
Browse files

string_decoder: support UCS2 and Base64 encoding

  • Loading branch information...
1 parent 26b6da1 commit 90ad7cdd863481e60d56499386d2e91145c4ecef @koichik committed Mar 29, 2012
Showing with 73 additions and 22 deletions.
  1. +58 −22 lib/string_decoder.js
  2. +15 −0 test/simple/test-string-decoder.js
View
@@ -21,17 +21,28 @@
var StringDecoder = exports.StringDecoder = function(encoding) {
this.encoding = (encoding || 'utf8').toLowerCase().replace(/[-_]/, '');
- if (this.encoding === 'utf8') {
- this.charBuffer = new Buffer(4);
- this.charReceived = 0;
- this.charLength = 0;
+ switch (this.encoding) {
+ case 'utf8':
+ this.detectIncompleteChar = detectUtf8IncompleteChar;
+ break;
+ case 'ucs2':
+ this.detectIncompleteChar = detectUcs2IncompleteChar;
+ break;
+ case 'base64':
+ this.detectIncompleteChar = detectBase64IncompleteChar;
+ this.end = endBase64;
+ break;
+ default:
+ return;
}
+ this.charBuffer = new Buffer(4);
+ this.charReceived = 0;
+ this.charLength = 0;
};
-
StringDecoder.prototype.write = function(buffer) {
- // If not utf8...
- if (this.encoding !== 'utf8') {
+ // If not multibytes character encoding...
+ if (!this.charBuffer) {
return buffer.toString(this.encoding);
}
@@ -53,7 +64,7 @@ StringDecoder.prototype.write = function(buffer) {
}
// get the character that was split
- charStr = this.charBuffer.slice(0, this.charLength).toString();
+ charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding);
this.charReceived = this.charLength = 0;
// if there are no more bytes in this buffer, just emit our char
@@ -63,7 +74,30 @@ StringDecoder.prototype.write = function(buffer) {
buffer = buffer.slice(i, buffer.length);
}
+ this.detectIncompleteChar(buffer);
+
+ if (!this.charLength) {
+ // no incomplete char at the end of this buffer, emit the whole thing
+ return charStr + buffer.toString(this.encoding);
+ }
+
+ // buffer the incomplete character bytes we got
+ var completeLength = buffer.length - this.charReceived;
+ buffer.copy(this.charBuffer, 0, completeLength, buffer.length);
+
+ if (buffer.length - completeLength > 0) {
+ // buffer had more bytes before the incomplete char
+ charStr += buffer.toString(this.encoding, 0, completeLength);
+ }
+
+ return charStr;
+};
+
+StringDecoder.prototype.end = function() {
+ return '';
+};
+function detectUtf8IncompleteChar(buffer) {
// determine how many bytes we have to check at the end of this buffer
var i = (buffer.length >= 3) ? 3 : buffer.length;
@@ -93,20 +127,22 @@ StringDecoder.prototype.write = function(buffer) {
}
}
- if (!this.charLength) {
- // no incomplete char at the end of this buffer, emit the whole thing
- return charStr + buffer.toString();
- }
-
- // buffer the incomplete character bytes we got
- buffer.copy(this.charBuffer, 0, buffer.length - i, buffer.length);
this.charReceived = i;
+}
- if (buffer.length - i > 0) {
- // buffer had more bytes before the incomplete char, emit them
- return charStr + buffer.toString('utf8', 0, buffer.length - i);
- }
+function detectUcs2IncompleteChar(buffer) {
+ this.charReceived = buffer.length % 2;
+ this.charLength = this.charReceived ? 2 : 0;
+}
- // or just emit the charStr
- return charStr;
-};
+function detectBase64IncompleteChar(buffer) {
+ this.charReceived = buffer.length % 3;
+ this.charLength = this.charReceived ? 3 : 0;
+}
+
+function endBase64() {
+ if (!this.charLength) {
+ return '';
+ }
+ return this.charBuffer.toString('base64', 0, this.charReceived);
+}
@@ -82,3 +82,18 @@ for (var j = 2; j < buffer.length; j++) {
}
console.log(' crayon!');
+// UCS2
+decoder = new StringDecoder('ucs2');
+buffer = new Buffer('ab', 'ucs2');
+assert.equal(decoder.write(buffer), 'ab'); // 2 complete chars
+buffer = new Buffer('abc', 'ucs2');
+assert.equal(decoder.write(buffer.slice(0, 3)), 'a'); // 'a' and first of 'b'
+assert.equal(decoder.write(buffer.slice(3, 6)), 'bc'); // second of 'b' and 'c'
+
+// Base64
+decoder = new StringDecoder('base64');
+buffer = new Buffer('41424344454647', 'hex');
+assert.equal(decoder.write(buffer.slice(0, 1)), '');
+assert.equal(decoder.write(buffer.slice(1, 3)), 'QUJD');
+assert.equal(decoder.write(buffer.slice(3, 7)), 'REVG');
+assert.equal(decoder.end(), 'Rw==');

0 comments on commit 90ad7cd

Please sign in to comment.