Permalink
Browse files

Refactor: Utf8Decoder -> StringDecoder

Instead of just decoding Utf8, this will proxy requests to buffer.toString()
for other encodings. This makes for a simpler interface.
  • Loading branch information...
1 parent 4b48042 commit 6bed15e0748590e2589e64b151c3f4b8aab7ae78 @ry ry committed Jun 16, 2010
View
@@ -637,17 +637,8 @@ sys.inherits(ReadStream, events.EventEmitter);
fs.FileReadStream = fs.ReadStream; // support the legacy name
ReadStream.prototype.setEncoding = function (encoding) {
- var Utf8Decoder = require("utf8decoder").Utf8Decoder; // lazy load
- var self = this;
- this._encoding = encoding.toLowerCase();
- if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
- this._decoder = new Utf8Decoder();
- this._decoder.onString = function(str) {
- self.emit('data', str);
- };
- } else if (this._decoder) {
- delete this._decoder;
- }
+ var StringDecoder = require("string_decoder").StringDecoder; // lazy load
+ this._decoder = new StringDecoder(encoding);
};
@@ -707,13 +698,11 @@ ReadStream.prototype._read = function () {
ReadStream.prototype._emitData = function (d) {
- if (!this._encoding) {
- this.emit('data', d);
- } else if (this._decoder) {
- this._decoder.write(d);
+ if (this._decoder) {
+ var string = this._decoder.write(d);
+ if (string.length) this.emit('data', string);
} else {
- var string = d.toString(this._encoding, 0, d.length);
- this.emit('data', string);
+ this.emit('data', d);
}
};
View
@@ -9,7 +9,6 @@ if (debugLevel & 0x4) {
}
var net = require('net');
-var Utf8Decoder = require('utf8decoder').Utf8Decoder;
var events = require('events');
var Buffer = require('buffer').Buffer;
@@ -93,14 +92,12 @@ var parsers = new FreeList('parsers', 1000, function () {
parser.onBody = function (b, start, len) {
// TODO body encoding?
- var enc = parser.incoming._encoding;
- if (!enc) {
- parser.incoming.emit('data', b.slice(start, start+len));
- } else if (this._decoder) {
- this._decoder.write(pool.slice(start, end));
+ var slice = b.slice(start, start+len);
+ if (parser.incoming._decoder) {
+ var string = parser.incoming._decoder.write(slice);
+ if (string.length) parser.incoming.emit('data', string);
} else {
- var string = b.toString(enc, start, start+len);
- parser.incoming.emit('data', string);
+ parser.incoming.emit('data', slice);
}
};
@@ -217,18 +214,9 @@ IncomingMessage.prototype.setBodyEncoding = function (enc) {
this.setEncoding(enc);
};
-IncomingMessage.prototype.setEncoding = function (enc) {
- // TODO check values, error out on bad, and deprecation message?
- this._encoding = enc.toLowerCase();
- if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
- this._decoder = new Utf8Decoder();
- this._decoder.onString = function(str) {
- this.emit('data', str);
- };
- } else if (this._decoder) {
- delete this._decoder;
- }
-
+IncomingMessage.prototype.setEncoding = function (encoding) {
+ var StringDecoder = require("string_decoder").StringDecoder; // lazy load
+ this._decoder = new StringDecoder(encoding);
};
IncomingMessage.prototype.pause = function () {
View
@@ -1,6 +1,5 @@
var sys = require("sys");
var fs = require("fs");
-var Utf8Decoder = require("utf8decoder").Utf8Decoder;
var events = require("events");
var dns = require('dns');
@@ -500,20 +499,20 @@ function initStream (self) {
var end = pool.used + bytesRead;
pool.used += bytesRead;
- if (!self._encoding) {
+ if (self._decoder) {
+ // emit String
+ var string = self._decoder.write(pool.slice(start, end));
+ if (string.length) self.emit('data', string);
+ } else {
+ // emit buffer
if (self._events && self._events['data']) {
// emit a slice
self.emit('data', pool.slice(start, end));
}
-
- // Optimization: emit the original buffer with end points
- if (self.ondata) self.ondata(pool, start, end);
- } else if (this._decoder) {
- this._decoder.write(pool.slice(start, end));
- } else {
- var string = pool.toString(self._encoding, start, end);
- self.emit('data', string);
}
+
+ // Optimization: emit the original buffer with end points
+ if (self.ondata) self.ondata(pool, start, end);
}
};
self.readable = false;
@@ -828,18 +827,9 @@ Stream.prototype._writeQueueLast = function () {
};
-Stream.prototype.setEncoding = function (enc) {
- var self = this;
- // TODO check values, error out on bad, and deprecation message?
- this._encoding = enc.toLowerCase();
- if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
- this._decoder = new Utf8Decoder();
- this._decoder.onString = function(str) {
- self.emit('data', str);
- };
- } else if (this._decoder) {
- delete this._decoder;
- }
+Stream.prototype.setEncoding = function (encoding) {
+ var StringDecoder = require("string_decoder").StringDecoder; // lazy load
+ this._decoder = new StringDecoder(encoding);
};
@@ -1,12 +1,21 @@
var Buffer = require('buffer').Buffer;
-var Utf8Decoder = exports.Utf8Decoder = function() {
- this.charBuffer = new Buffer(4);
- this.charReceived = 0;
- this.charLength = 0;
+var StringDecoder = exports.StringDecoder = function (encoding) {
+ this.encoding = (encoding || 'utf8').toLowerCase().replace(/[-_]/,'');
+ if (this.encoding === 'utf8') {
+ this.charBuffer = new Buffer(4);
+ this.charReceived = 0;
+ this.charLength = 0;
+ }
};
-Utf8Decoder.prototype.write = function(buffer) {
+
+StringDecoder.prototype.write = function (buffer) {
+ // If not utf8...
+ if (this.encoding !== 'utf8') {
+ return buffer.toString(this.encoding);
+ }
+
var charStr = '';
// if our last write ended with an incomplete multibyte character
if (this.charLength) {
@@ -21,28 +30,23 @@ Utf8Decoder.prototype.write = function(buffer) {
if (this.charReceived < this.charLength) {
// still not enough chars in this buffer? wait for more ...
- return;
+ return '';
}
// get the character that was split
charStr = this.charBuffer.slice(0, this.charLength).toString();
this.charReceived = this.charLength = 0;
- if (i == buffer.length) {
- // if there are no more bytes in this buffer, just emit our char
- this.onString(charStr)
- return;
- }
+ // if there are no more bytes in this buffer, just emit our char
+ if (i == buffer.length) return charStr;
- // otherwise cut of the characters end from the beginning of this buffer
+ // otherwise cut off the characters end from the beginning of this buffer
buffer = buffer.slice(i, buffer.length);
}
// determine how many bytes we have to check at the end of this buffer
- var i = (buffer.length >= 3)
- ? 3
- : buffer.length;
+ var i = (buffer.length >= 3) ? 3 : buffer.length;
// figure out if one of the last i bytes of our buffer announces an incomplete char
for (; i > 0; i--) {
@@ -71,8 +75,7 @@ Utf8Decoder.prototype.write = function(buffer) {
if (!this.charLength) {
// no incomplete char at the end of this buffer, emit the whole thing
- this.onString(charStr+buffer.toString());
- return;
+ return charStr + buffer.toString();
}
// buffer the incomplete character bytes we got
@@ -81,9 +84,9 @@ Utf8Decoder.prototype.write = function(buffer) {
if (buffer.length - i > 0) {
// buffer had more bytes before the incomplete char, emit them
- this.onString(charStr+buffer.slice(0, buffer.length - i).toString());
- } else if (charStr) {
- // or just emit the charStr if any
- this.onString(charStr);
+ return charStr + buffer.toString('utf8', 0, buffer.length - i);
}
+
+ // or just emit the charStr
+ return charStr;
};
View
@@ -1866,7 +1866,7 @@ static Handle<Value> Binding(const Arguments& args) {
exports->Set(String::New("utils"), String::New(native_utils));
exports->Set(String::New("path"), String::New(native_path));
exports->Set(String::New("module"), String::New(native_module));
- exports->Set(String::New("utf8decoder"), String::New(native_utf8decoder));
+ exports->Set(String::New("string_decoder"), String::New(native_string_decoder));
binding_cache->Set(module, exports);
}
@@ -50,12 +50,6 @@ function testServer(){
sys.inherits(testServer, http.Server);
-function testClient(){
- var conn = net.createConnection(PORT);
- conn.setEncoding("utf8");
- return conn;
-}
-
function writeReq(socket, data, encoding){
requests_sent++;
socket.write(data);
@@ -66,7 +60,8 @@ function writeReq(socket, data, encoding){
connection: Upgrade with listener
-----------------------------------------------*/
function test_upgrade_with_listener(_server){
- var conn = new testClient();
+ var conn = net.createConnection(PORT);
+ conn.setEncoding("utf8");
var state = 0;
conn.addListener("connect", function () {
@@ -79,10 +74,12 @@ function test_upgrade_with_listener(_server){
);
});
- conn.addListener("data", function(data){
+ conn.addListener("data", function (data) {
state++;
- if(state == 1){
+ assert.equal('string', typeof data);
+
+ if(state == 1) {
assert.equal("HTTP/1.1 101", data.substr(0, 12));
assert.equal("WjN}|M(6", request_upgradeHead.toString("utf8"));
conn.write("test", "utf8");
@@ -106,7 +103,8 @@ function test_upgrade_with_listener(_server){
var test_upgrade_no_listener_ended = false;
function test_upgrade_no_listener(){
- var conn = new testClient();
+ var conn = net.createConnection(PORT);
+ conn.setEncoding("utf8");
conn.addListener("connect", function () {
writeReq(conn, "GET / HTTP/1.1\r\nUpgrade: WebSocket\r\nConnection: Upgrade\r\n\r\n");
@@ -126,12 +124,15 @@ function test_upgrade_no_listener(){
connection: normal
-----------------------------------------------*/
function test_standard_http(){
- var conn = new testClient();
+ var conn = net.createConnection(PORT);
+ conn.setEncoding("utf8");
+
conn.addListener("connect", function () {
writeReq(conn, "GET / HTTP/1.1\r\n\r\n");
});
conn.addListener("data", function(data){
+ assert.equal("string", typeof data);
assert.equal("HTTP/1.1 200", data.substr(0, 12));
conn.end();
});
@@ -144,7 +145,7 @@ function test_standard_http(){
var server = createTestServer();
-server.listen(PORT, function(){
+server.listen(PORT, function () {
// All tests get chained after this:
test_upgrade_with_listener(server);
});
@@ -1,36 +1,30 @@
require('../common');
-var Utf8Decoder = require('utf8decoder').Utf8Decoder,
- Buffer = require('buffer').Buffer,
- decoder = new Utf8Decoder(),
- buffer,
- onStringCalled = 0;
-decoder.onString = function(str) {
- onStringCalled++;
- assert.deepEqual(str, buffer.toString());
-};
+Buffer = require('buffer').Buffer;
+StringDecoder = require('string_decoder').StringDecoder;
+decoder = new StringDecoder('utf8');
+
+
buffer = new Buffer('$');
-decoder.write(buffer);
-assert.equal(onStringCalled, 1);
+assert.deepEqual('$', decoder.write(buffer));
buffer = new Buffer('¢');
-decoder.write(buffer.slice(0, 1));
-decoder.write(buffer.slice(1, 2));
-assert.equal(onStringCalled, 2);
+assert.deepEqual('', decoder.write(buffer.slice(0, 1)));
+assert.deepEqual('¢', decoder.write(buffer.slice(1, 2)));
buffer = new Buffer('');
-decoder.write(buffer.slice(0, 1));
-decoder.write(buffer.slice(1, 2));
-decoder.write(buffer.slice(2, 3));
-assert.equal(onStringCalled, 3);
+assert.deepEqual('', decoder.write(buffer.slice(0, 1)));
+assert.deepEqual('', decoder.write(buffer.slice(1, 2)));
+assert.deepEqual('', decoder.write(buffer.slice(2, 3)));
buffer = new Buffer([0xF0, 0xA4, 0xAD, 0xA2]);
-decoder.write(buffer.slice(0, 1));
-decoder.write(buffer.slice(1, 2));
-decoder.write(buffer.slice(2, 3));
-decoder.write(buffer.slice(3, 4));
-assert.equal(onStringCalled, 4);
+s = '';
+s += decoder.write(buffer.slice(0, 1));
+s += decoder.write(buffer.slice(1, 2));
+s += decoder.write(buffer.slice(2, 3));
+s += decoder.write(buffer.slice(3, 4));
+assert.ok(s.length > 0);
// A mixed ascii and non-ascii string
// Test stolen from deps/v8/test/cctest/test-strings.cc
@@ -51,18 +45,16 @@ charLengths = [0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5];
print('scanning ');
for (var j = 2; j < buffer.length; j++) {
for (var i = 1; i < j; i++) {
- var decoder = new Utf8Decoder();
- var sum = "";
- decoder.onString = function (s) { sum += s; };
+ var decoder = new StringDecoder('utf8');
- decoder.write(buffer.slice(0, i));
+ var sum = decoder.write(buffer.slice(0, i));
// just check that we've received the right amount
// after the first write
assert.equal(charLengths[i], sum.length);
- decoder.write(buffer.slice(i, j));
- decoder.write(buffer.slice(j, buffer.length));
+ sum += decoder.write(buffer.slice(i, j));
+ sum += decoder.write(buffer.slice(j, buffer.length));
assert.equal(expected, sum);
print(".");
}

1 comment on commit 6bed15e

Ah, that's very nice! : )

Should the StringDecoder now be documented?

Please sign in to comment.