Browse files

loop unrolling and various other buffer optimizations

  • Loading branch information...
1 parent cd89376 commit a14dcfc42382e0b3412075c568bd8152e0987820 @einaros einaros committed Dec 13, 2011
Showing with 59 additions and 14 deletions.
  1. +30 −3 lib/Receiver.js
  2. +18 −6 lib/Sender.js
  3. +9 −3 src/bufferutil.cc
  4. +2 −2 wscript
View
33 lib/Receiver.js
@@ -313,7 +313,21 @@ Receiver.prototype.add = function(data) {
return;
}
var toRead = Math.min(data.length, this.expectBuffer.length - this.expectOffset);
- data.copy(this.expectBuffer, this.expectOffset, 0, toRead);
+ // although ugly, this is a much faster approach for small buffers,
+ // than calling copy()
+ var dest = this.expectBuffer;
+ var offset = this.expectOffset;
+ switch (toRead) {
+ default: data.copy(dest, offset, 0, toRead); break;
+ case 8: dest[offset+7] = data[7];
+ case 7: dest[offset+6] = data[6];
+ case 6: dest[offset+5] = data[5];
+ case 5: dest[offset+4] = data[4];
+ case 4: dest[offset+3] = data[3];
+ case 3: dest[offset+2] = data[2];
+ case 2: dest[offset+1] = data[1];
+ case 1: dest[offset] = data[0];
+ }
this.expectOffset += toRead;
if (toRead < data.length) {
this.addToOverflow(data.slice(toRead, data.length));
@@ -336,9 +350,22 @@ Receiver.prototype.addToOverflow = function(data) {
if (this.overflow == null) this.overflow = data;
else {
var prevOverflow = this.overflow;
- this.overflow = this.allocateFromPool(this.overflow.length + data.length);
+ var dataLength = data.length;
+ this.overflow = this.allocateFromPool(this.overflow.length + dataLength);
prevOverflow.copy(this.overflow, 0);
- data.copy(this.overflow, prevOverflow.length);
+ var dest = this.overflow;
+ var offset = prevOverflow.length;
+ switch (dataLength) {
+ default: data.copy(dest, offset, 0, dataLength); break;
+ case 8: dest[offset+7] = data[7];
+ case 7: dest[offset+6] = data[6];
+ case 6: dest[offset+5] = data[5];
+ case 5: dest[offset+4] = data[4];
+ case 4: dest[offset+3] = data[3];
+ case 3: dest[offset+2] = data[2];
+ case 2: dest[offset+1] = data[1];
+ case 1: dest[offset] = data[0];
+ }
}
}
View
24 lib/Sender.js
@@ -33,7 +33,8 @@ var writeUInt32BE = !isNodeV4
* HyBi Sender implementation
*/
-function Sender (socket) {
+function Sender (socket, config) {
+ this._sendCacheSize = (config && config.SendBufferCacheSize) ? config.SendBufferCacheSize : 65536;
this._socket = socket;
this.firstFragment = true;
}
@@ -124,14 +125,18 @@ Sender.prototype.send = function(data, options, cb) {
if (typeof cb == 'function') cb(e);
else this.emit('error', e);
}
+ finally {
+ if (this._sendCache && this._sendCache.length > this._sendCacheSize) {
+ this._sendCache = null;
+ }
+ }
}
/**
* Frames a piece of data according to the HyBi WebSocket protocol.
*
* @api private
*/
-
Sender.prototype.frameData = function(opcode, data, finalFragment, maskData) {
if (!data) return new Buffer([opcode | (finalFragment ? 0x80 : 0), 0]);
else if (!(data instanceof Buffer)) {
@@ -149,7 +154,8 @@ Sender.prototype.frameData = function(opcode, data, finalFragment, maskData) {
dataOffset += 2;
secondByte = 126;
}
- var outputBuffer = new Buffer(dataLength + dataOffset);
+ var outputBuffer = (this._sendCache && this._sendCache.length >= dataLength + dataOffset)
+ ? this._sendCache : (this._sendCache = new Buffer(dataLength + dataOffset));
if (finalFragment) opcode = opcode | 0x80;
outputBuffer[0] = opcode;
switch (secondByte) {
@@ -162,13 +168,19 @@ Sender.prototype.frameData = function(opcode, data, finalFragment, maskData) {
}
if (maskData) {
var mask = this._randomMask || (this._randomMask = getRandomMask());
- mask.copy(outputBuffer, dataOffset - 4);
- bufferUtil.mask(data, mask, outputBuffer, dataOffset);
+ //faster:
+ outputBuffer[dataOffset - 4] = mask[0];
+ outputBuffer[dataOffset - 3] = mask[1];
+ outputBuffer[dataOffset - 2] = mask[2];
+ outputBuffer[dataOffset - 1] = mask[3];
+ //slower:
+ //mask.copy(outputBuffer, dataOffset - 4);
+ bufferUtil.mask(data, mask, outputBuffer, dataOffset, dataLength);
secondByte = secondByte | 0x80;
}
else data.copy(outputBuffer, dataOffset);
outputBuffer[1] = secondByte;
- return outputBuffer;
+ return outputBuffer.slice(0, dataOffset + dataLength);
}
module.exports = Sender;
View
12 src/bufferutil.cc
@@ -79,14 +79,20 @@ class BufferUtil : public ObjectWrap
HandleScope scope;
Local<Object> buffer_obj = args[0]->ToObject();
unsigned char* buffer = (unsigned char*)Buffer::Data(buffer_obj);
- size_t length = Buffer::Length(buffer_obj);
Local<Object> mask_obj = args[1]->ToObject();
unsigned char *mask = (unsigned char*)Buffer::Data(mask_obj);
Local<Object> output_obj = args[2]->ToObject();
unsigned char* output = (unsigned char*)Buffer::Data(output_obj);
- int dataOffset = args[3]->Int32Value();
+ size_t dataOffset = args[3]->Int32Value();
+ size_t length = args[4]->Int32Value();
int i;
- for (i = 0; i < length; ++i) {
+ for (i = 0; i + 3 < length; i += 4) {
+ output[dataOffset + i] = buffer[i] ^ mask[0];
+ output[dataOffset + i + 1] = buffer[i + 1] ^ mask[1];
+ output[dataOffset + i + 2] = buffer[i + 2] ^ mask[2];
+ output[dataOffset + i + 3] = buffer[i + 3] ^ mask[3];
+ }
+ for (; i < length; ++i) {
output[dataOffset + i] = buffer[i] ^ mask[i % 4];
}
return True();
View
4 wscript
@@ -8,8 +8,8 @@ def set_options(opt):
def configure(conf):
conf.check_tool('compiler_cxx')
conf.check_tool('node_addon')
- conf.env.append_value('CCFLAGS', ['-O3'])
- conf.env.append_value('CXXFLAGS', ['-O3'])
+ conf.env.append_value('CCFLAGS', ['-Ofast'])
+ conf.env.append_value('CXXFLAGS', ['-Ofast'])
def build(bld):
obj = bld.new_task_gen('cxx', 'shlib', 'node_addon')

0 comments on commit a14dcfc

Please sign in to comment.