Skip to content

Commit 79ad172

Browse files
mscdexrvagg
authored andcommitted
string_decoder: rewrite implementation
This commit provides a rewrite of StringDecoder that both improves performance (for non-single-byte encodings) and understandability. Additionally, StringDecoder instantiation performance has increased considerably due to inlinability and more efficient encoding name checking. PR-URL: #6777 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
1 parent 3518ab9 commit 79ad172

File tree

5 files changed

+314
-208
lines changed

5 files changed

+314
-208
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
'use strict';
2+
const common = require('../common.js');
3+
const StringDecoder = require('string_decoder').StringDecoder;
4+
5+
const bench = common.createBenchmark(main, {
6+
encoding: [
7+
'ascii', 'utf8', 'utf-8', 'base64', 'ucs2', 'UTF-8', 'AscII', 'UTF-16LE'
8+
],
9+
n: [25e6]
10+
});
11+
12+
function main(conf) {
13+
const encoding = conf.encoding;
14+
const n = conf.n | 0;
15+
16+
bench.start();
17+
for (var i = 0; i < n; ++i) {
18+
const sd = new StringDecoder(encoding);
19+
!!sd.encoding;
20+
}
21+
bench.end(n);
22+
}

benchmark/string_decoder/string-decoder.js

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,79 @@
11
'use strict';
2-
var common = require('../common.js');
3-
var StringDecoder = require('string_decoder').StringDecoder;
2+
const common = require('../common.js');
3+
const StringDecoder = require('string_decoder').StringDecoder;
44

5-
var bench = common.createBenchmark(main, {
6-
encoding: ['ascii', 'utf8', 'base64-utf8', 'base64-ascii'],
7-
inlen: [32, 128, 1024],
5+
const bench = common.createBenchmark(main, {
6+
encoding: ['ascii', 'utf8', 'base64-utf8', 'base64-ascii', 'utf16le'],
7+
inlen: [32, 128, 1024, 4096],
88
chunk: [16, 64, 256, 1024],
9-
n: [25e4]
9+
n: [25e5]
1010
});
1111

12-
var UTF_ALPHA = 'Blåbærsyltetøy';
13-
var ASC_ALPHA = 'Blueberry jam';
12+
const UTF8_ALPHA = 'Blåbærsyltetøy';
13+
const ASC_ALPHA = 'Blueberry jam';
14+
const UTF16_BUF = Buffer.from('Blåbærsyltetøy', 'utf16le');
1415

1516
function main(conf) {
16-
var encoding = conf.encoding;
17-
var inLen = conf.inlen | 0;
18-
var chunkLen = conf.chunk | 0;
19-
var n = conf.n | 0;
17+
const encoding = conf.encoding;
18+
const inLen = conf.inlen | 0;
19+
const chunkLen = conf.chunk | 0;
20+
const n = conf.n | 0;
2021

2122
var alpha;
22-
var chunks = [];
23+
var buf;
24+
const chunks = [];
2325
var str = '';
24-
var isBase64 = (encoding === 'base64-ascii' || encoding === 'base64-utf8');
26+
const isBase64 = (encoding === 'base64-ascii' || encoding === 'base64-utf8');
2527
var i;
2628

2729
if (encoding === 'ascii' || encoding === 'base64-ascii')
2830
alpha = ASC_ALPHA;
2931
else if (encoding === 'utf8' || encoding === 'base64-utf8')
30-
alpha = UTF_ALPHA;
31-
else
32+
alpha = UTF8_ALPHA;
33+
else if (encoding === 'utf16le') {
34+
buf = UTF16_BUF;
35+
str = Buffer.alloc(0);
36+
} else
3237
throw new Error('Bad encoding');
3338

34-
var sd = new StringDecoder(isBase64 ? 'base64' : encoding);
39+
const sd = new StringDecoder(isBase64 ? 'base64' : encoding);
3540

3641
for (i = 0; i < inLen; ++i) {
3742
if (i > 0 && (i % chunkLen) === 0 && !isBase64) {
38-
chunks.push(Buffer.from(str, encoding));
39-
str = '';
43+
if (alpha) {
44+
chunks.push(Buffer.from(str, encoding));
45+
str = '';
46+
} else {
47+
chunks.push(str);
48+
str = Buffer.alloc(0);
49+
}
50+
}
51+
if (alpha)
52+
str += alpha[i % alpha.length];
53+
else {
54+
var start = i;
55+
var end = i + 2;
56+
if (i % 2 !== 0) {
57+
++start;
58+
++end;
59+
}
60+
str = Buffer.concat([
61+
str,
62+
buf.slice(start % buf.length, end % buf.length)
63+
]);
4064
}
41-
str += alpha[i % alpha.length];
4265
}
43-
if (str.length > 0 && !isBase64)
66+
67+
if (!alpha) {
68+
if (str.length > 0)
69+
chunks.push(str);
70+
} else if (str.length > 0 && !isBase64)
4471
chunks.push(Buffer.from(str, encoding));
72+
4573
if (isBase64) {
4674
str = Buffer.from(str, 'utf8').toString('base64');
4775
while (str.length > 0) {
48-
var len = Math.min(chunkLen, str.length);
76+
const len = Math.min(chunkLen, str.length);
4977
chunks.push(Buffer.from(str.substring(0, len), 'utf8'));
5078
str = str.substring(len);
5179
}

0 commit comments

Comments
 (0)