Skip to content

Commit

Permalink
f
Browse files Browse the repository at this point in the history
  • Loading branch information
fengmk2 committed Aug 3, 2017
1 parent 49296ba commit 590feb8
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 4 deletions.
12 changes: 11 additions & 1 deletion lib/byte.js
Expand Up @@ -362,15 +362,25 @@ ByteBuffer.prototype.putRawString = function (index, str) {
this._checkSize(this._offset + maxIncreaseSize);
}

// CESU-8 Bit Distribution
// @see http://www.unicode.org/reports/tr26/
//
// UTF-16 Code Unit | 1st Byte | 2nd Byte | 3rd Byte
// 000000000xxxxxxx (0x0000 ~ 0x007f) | 0xxxxxxx (0x00 ~ 0x7f) | |
// 00000yyyyyxxxxxx (0x0080 ~ 0x07ff) | 110yyyyy (0xc0 ~ 0xdf) | 10xxxxxx (0x80 ~ 0xbf) |
// zzzzyyyyyyxxxxxx (0x0800 ~ 0xffff) | 1110zzzz (0xe0 ~ 0xef) | 10yyyyyy (0x80 ~ 0xbf) | 10xxxxxx (0x80 ~ 0xbf)

var len = str && str.length;
if (!len) {
return this;
}
for (var i = 0; i < len; i++) {
var ch = str.charCodeAt(i);
// 0x80: 128
if (ch < 0x80) {
this._bytes[index++] = ch >>> 32;
this._bytes[index++] = ch;
} else if (ch < 0x800) {
// 0x800: 2048
this._bytes[index++] = (0xc0 + ((ch >> 6) & 0x1f)) >>> 32;
this._bytes[index++] = (0x80 + (ch & 0x3f)) >>> 32;
} else {
Expand Down
8 changes: 5 additions & 3 deletions package.json
Expand Up @@ -4,10 +4,10 @@
"description": "Input Buffer and Output Buffer, just like Java ByteBuffer",
"main": "lib/byte.js",
"files": [
"lib/"
"lib"
],
"scripts": {
"test": "mocha -R spec -t 5000 test/*.test.js",
"test": "mocha --require intelli-espower-loader -R spec -t 5000 test/*.test.js",
"test-cov": "node node_modules/.bin/istanbul cover node_modules/.bin/_mocha -- -t 5000 test/*.test.js",
"test-travis": "node node_modules/.bin/istanbul cover node_modules/.bin/_mocha --report lcovonly -- -t 5000 test/*.test.js",
"jshint": "jshint .",
Expand All @@ -27,10 +27,12 @@
"benchmark": "1",
"contributors": "*",
"fastbench": "^1.0.1",
"intelli-espower-loader": "^1.0.1",
"istanbul": "*",
"jshint": "*",
"mocha": "*",
"optimized": "^1.2.0"
"optimized": "^1.2.0",
"power-assert": "^1.4.4"
},
"homepage": "https://github.com/node-modules/byte",
"repository": {
Expand Down
143 changes: 143 additions & 0 deletions test/byte.test.js
Expand Up @@ -483,6 +483,149 @@ describe('byte.test.js', function () {
assert(bytes.toString() === '<ByteBuffer>');
});

it('should 000000000xxxxxxx (0x0000 ~ 0x007f) => 0xxxxxxx (0x00 ~ 0x7f)', function() {
// UTF-8
var bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x0000));
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 00>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x0000));
assert(bytes.toString() === '<ByteBuffer 00>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x0001));
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 01>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x0001));
assert(bytes.toString() === '<ByteBuffer 01>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString('E'); // 0x45
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 45>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString('E');
assert(bytes.toString() === '<ByteBuffer 45>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x7F));
assert(bytes.toString() === '<ByteBuffer 00 00 00 01 7f>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x7F));
assert(bytes.toString() === '<ByteBuffer 7f>');
});

it('should 00000yyyyyxxxxxx (0x0080 ~ 0x07ff) => 110yyyyy (0xc0 ~ 0xdf) | 10xxxxxx (0x80 ~ 0xbf)', function() {
// UTF-8
var bytes = ByteBuffer.allocate(1);
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x80));
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 c2 80>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x80));
assert(bytes.toString() === '<ByteBuffer c2 80>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString('ȅ'); // 0x0205: 517
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 c8 85>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString('ȅ');
assert(bytes.toString() === '<ByteBuffer c8 85>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x81));
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 c2 81>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x81));
assert(bytes.toString() === '<ByteBuffer c2 81>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x7FE));
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 df be>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x7FE));
assert(bytes.toString() === '<ByteBuffer df be>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x7FF));
assert(bytes.toString() === '<ByteBuffer 00 00 00 02 df bf>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x7FF));
assert(bytes.toString() === '<ByteBuffer df bf>');
});

it('should zzzzyyyyyyxxxxxx (0x0800 ~ 0xffff) => 1110zzzz (0xe0 ~ 0xef) | 10yyyyyy (0x80 ~ 0xbf) | 10xxxxxx (0x80 ~ 0xbf)', function() {
// UTF-8
var bytes = ByteBuffer.allocate(1);
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x800));
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 e0 a0 80>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x800));
assert(bytes.toString() === '<ByteBuffer e0 a0 80>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0x801));
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 e0 a0 81>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0x801));
assert(bytes.toString() === '<ByteBuffer e0 a0 81>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString('𐐀'); // 0xD801 0xDC00
assert(bytes.toString() === '<ByteBuffer 00 00 00 04 f0 90 90 80>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString('𐐀');
assert(bytes.toString() === '<ByteBuffer ed a0 81 ed b0 80>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString('\ud801\udc01'); // 0xD801 0xDC01
assert(bytes.toString() === '<ByteBuffer 00 00 00 04 f0 90 90 81>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString('\ud801\udc01');
assert(bytes.toString() === '<ByteBuffer ed a0 81 ed b0 81>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0xFFFE));
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 ef bf be>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0xFFFE));
assert(bytes.toString() === '<ByteBuffer ef bf be>');

// UTF-8
bytes = ByteBuffer.allocate(1);
bytes.putString(String.fromCharCode(0xFFFF));
assert(bytes.toString() === '<ByteBuffer 00 00 00 03 ef bf bf>');
// CESU-8
bytes = ByteBuffer.allocate(1);
bytes.putRawString(String.fromCharCode(0xFFFF));
assert(bytes.toString() === '<ByteBuffer ef bf bf>');
});

it('should put emoji', function () {
// utf8
var bytes = ByteBuffer.allocate(1);
Expand Down

0 comments on commit 590feb8

Please sign in to comment.