Skip to content

Commit

Permalink
refactor: improve CESU-8 encoding coding style
Browse files Browse the repository at this point in the history
BTW: js CESU-8 encoding is faster then Buffer UTF-8 encoding

```
putRawStringSmallLessThan0x80*10000: 672.642ms
putRawStringSmallLessThan0x800*10000: 592.960ms
putRawStringSmallBiggerThan0x800*10000: 861.010ms

putUTF8RawStringSmallLessThan0x80*10000: 841.638ms
putUTF8RawStringSmallLessThan0x800*10000: 958.383ms
putUTF8RawStringSmallBiggerThan0x800*10000: 1793.470ms
```
  • Loading branch information
fengmk2 committed Aug 7, 2017
1 parent 7dd0a0c commit 7fa2f4c
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 47 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -14,3 +14,4 @@ results
node_modules
npm-debug.log
coverage/
profile-*
8 changes: 4 additions & 4 deletions .travis.yml
@@ -1,9 +1,9 @@
sudo: false
language: node_js
node_js:
- '0.12'
- '1'
- '2'
- '3'
- '4'
script: "npm run test-travis"
- '6'
- '8'
script: "npm run test-travis && npm run benchmark"
after_script: "npm install coveralls@2 && cat ./coverage/lcov.info | coveralls"
148 changes: 145 additions & 3 deletions benchmark/putRawString.js
Expand Up @@ -10,23 +10,165 @@ largeStr += largeStr
var bb = ByteBuffer.allocate(1024);
var max = 10;

// putRawString

bb.putRawString(makeStr('a', 200));
console.log('small bytes %s', bb.array().length);
console.log('putRawString: small < 0x80 bytes %s, one char length: %d',
bb.array().length, 'a'.length);
bb.reset();

bb.putRawString(makeStr('ȅ', 200));
console.log('putRawString: small < 0x800 bytes %s, one char length: %d',
bb.array().length, 'ȅ'.length);
bb.reset();

bb.putRawString(makeStr('𐐀', 200));
console.log('putRawString: small >= 0x800 bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d',
bb.array().length, '𐐀'.length, makeStr('𐐀', 200).length * 3, bb._size);
bb.reset();

bb.putRawString(makeStr(String.fromCharCode(0x801), 200));
console.log('putRawString: small = 0x801 bytes %s, one char length: %d',
bb.array().length, String.fromCharCode(0x801).length);
bb.reset();

bb.putRawString(makeStr('中文', 200));
console.log('putRawString: small 中文 bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d',
bb.array().length, '中文'.length, makeStr('中文', 200).length * 3, bb._size);
bb.reset();

bb.putRawString(makeStr('\ud83c\udf3c', 200));
console.log('putRawString: small \ud83c\udf3c bytes %s, one char length: %d, maxIncreaseSize: %d, bb.size: %d',
bb.array().length, '\ud83c\udf3c'.length, makeStr('\ud83c\udf3c', 200).length * 3, bb._size);
bb.reset();

bb.putRawString(makeStr(largeStr, 10));
console.log('large bytes %s', bb.array().length);
console.log('putRawString: large bytes %s, one char length: %d',
bb.array().length);
bb.reset();

// putUTF8RawString

bb = ByteBuffer.allocate(2);
bb.putUTF8RawString(makeStr('a', 200));
console.log('putUTF8RawString: small < 0x80 bytes %s, one char length: %d',
bb.array().length, 'a'.length);
bb.reset();

bb.putUTF8RawString(makeStr('ȅ', 200));
console.log('putUTF8RawString: small < 0x800 bytes %s, one char length: %d',
bb.array().length, 'ȅ'.length);
bb.reset();

bb.putUTF8RawString(makeStr('𐐀', 200));
console.log('putUTF8RawString: small >= 0x800 bytes %s, one char length: %d, byteLength: %d, bb.size: %d',
bb.array().length, '𐐀'.length, Buffer.byteLength(makeStr('𐐀', 200)), bb._size);
bb.reset();

bb.putUTF8RawString(makeStr('中文', 200));
console.log('putUTF8RawString: small 中文 bytes %s, one char length: %d, byteLength: %d, bb.size: %d',
bb.array().length, '中文'.length, Buffer.byteLength(makeStr('中文', 200)), bb._size);
bb.reset();

bb.putUTF8RawString(makeStr('\ud83c\udf3c', 200));
console.log('putUTF8RawString: small \ud83c\udf3c bytes %s, one char length: %d, byteLength: %d, bb.size: %d',
bb.array().length, '\ud83c\udf3c'.length, Buffer.byteLength(makeStr('\ud83c\udf3c', 200)), bb._size);
bb.reset();

bb.putUTF8RawString(makeStr(String.fromCharCode(0x801), 200));
console.log('putUTF8RawString: small = 0x801 bytes %s, one char length: %d',
bb.array().length, String.fromCharCode(0x801).length);
bb.reset();

bb.putUTF8RawString(makeStr(largeStr, 10));
console.log('putUTF8RawString: large bytes %s, one char length: %d',
bb.array().length);
bb.reset();

bb = ByteBuffer.allocate(1024);

var run = bench([
function putRawStringSmall(cb) {
function putRawStringSmallLessThan0x80(cb) {
for (var i = 0; i < max; i++) {
bb.putRawString(makeStr('a', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putRawStringSmallLessThan0x800(cb) {
for (var i = 0; i < max; i++) {
bb.putRawString(makeStr('ȅ', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putRawStringSmallBiggerThan0x800(cb) {
for (var i = 0; i < max; i++) {
bb.putRawString(makeStr('𐐀', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putRawStringSmallChinese(cb) {
for (var i = 0; i < max; i++) {
bb.putRawString(makeStr('中文', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putRawStringSmallEmoji(cb) {
for (var i = 0; i < max; i++) {
bb.putRawString(makeStr('\ud83c\udf3c', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},

function putUTF8RawStringSmallLessThan0x80(cb) {
for (var i = 0; i < max; i++) {
bb.putUTF8RawString(makeStr('a', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putUTF8RawStringSmallLessThan0x800(cb) {
for (var i = 0; i < max; i++) {
bb.putUTF8RawString(makeStr('ȅ', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putUTF8RawStringSmallBiggerThan0x800(cb) {
for (var i = 0; i < max; i++) {
bb.putUTF8RawString(makeStr('𐐀', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putUTF8RawStringSmallChinese(cb) {
for (var i = 0; i < max; i++) {
bb.putUTF8RawString(makeStr('中文', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
function putUTF8RawStringSmallEmoji(cb) {
for (var i = 0; i < max; i++) {
bb.putUTF8RawString(makeStr('\ud83c\udf3c', 200));
}
bb.array();
bb.reset();
setImmediate(cb);
},
// function putRawStringLarge(cb) {
// for (var i = 0; i < max; i++) {
// bb.putRawString(makeStr(largeStr, 10));
Expand Down
35 changes: 29 additions & 6 deletions lib/byte.js
Expand Up @@ -344,7 +344,7 @@ ByteBuffer.prototype._putString = function (index, value, format) {

// Prints a string to the Buffer, encoded as CESU-8
ByteBuffer.prototype.putRawString = function (index, str) {
if (typeof index === 'string') {
if (arguments.length === 1) {
// putRawString(str)
str = index;
index = this._offset;
Expand Down Expand Up @@ -381,19 +381,42 @@ ByteBuffer.prototype.putRawString = function (index, str) {
this._bytes[index++] = ch;
} else if (ch < 0x800) {
// 0x800: 2048
this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32;
this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
// this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32;
// this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
this._bytes[index++] = (ch >>> 6) | 0xc0;
this._bytes[index++] = (ch & 0x3f) | 0x80; // 0x3f => 0b00111111
} else {
this._bytes[index++] = (0xe0 + ((ch >> 12) & 0xf)) >>> 32;
this._bytes[index++] = (0x80 + ((ch >> 6) & 0x3f)) >>> 32;
this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
// this._bytes[index++] = (0xe0 + ((ch >> 12) & 0xf)) >>> 32;
// this._bytes[index++] = (0x80 + ((ch >> 6) & 0x3f)) >>> 32;
// this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
this._bytes[index++] = (ch >>> 12) | 0xe0;
this._bytes[index++] = ((ch >>> 6) & 0x3f) | 0x80;
this._bytes[index++] = (ch & 0x3f) | 0x80;
}
}
// index is now probably less than @_offset and reflects the real length
this._offset = index;
return this;
};

ByteBuffer.prototype.putUTF8RawString = function (index, str) {
var buf;
if (arguments.length === 1) {
// putUTF8RawString(str)
str = index;
index = this._offset;
buf = Buffer.from ? Buffer.from(str) : new Buffer(str);
this._checkSize(this._offset + buf.length);
buf.copy(this._bytes, index);
} else {
buf = Buffer.from ? Buffer.from(str) : new Buffer(str);
buf.copy(this._bytes, index);
}

this._offset = index + buf.length;
return this;
};

ByteBuffer.prototype._copy = function (start, end) {
// magic number here..
// @see benchmark/buffer_slice_and_copy.js
Expand Down
5 changes: 3 additions & 2 deletions package.json
Expand Up @@ -14,7 +14,8 @@
"autod": "autod -w --prefix '^' -e benchmark && npm run cnpm",
"cnpm": "npm install --registry=https://registry.npm.taobao.org",
"contributors": "contributors -f plain -o AUTHORS",
"optimized": "node --allow-natives-syntax --trace_opt --trace_deopt test/optimized.js"
"optimized": "node --allow-natives-syntax --trace_opt --trace_deopt test/optimized.js",
"benchmark": "node benchmark/putRawString.js"
},
"dependencies": {
"debug": "^2.6.6",
Expand Down Expand Up @@ -56,4 +57,4 @@
},
"author": "fengmk2 <fengmk2@gmail.com> (http://fengmk2.com)",
"license": "MIT"
}
}

0 comments on commit 7fa2f4c

Please sign in to comment.