Skip to content
Permalink
Browse files

lib: add ASCII fast path to getStringWidth()

A lot of strings that are going to be passed to `getStringWidth()`
are ASCII strings, for which the calculation is rather easy and
calling into C++ can be skipped.

                                                       confidence improvement accuracy (*)    (**)   (***)
     misc/getstringwidth.js n=100000 type='ascii'            ***    328.99 %      ±21.73% ±29.25% ±38.77%
     misc/getstringwidth.js n=100000 type='emojiseq'                  2.94 %       ±7.66% ±10.19% ±13.26%
     misc/getstringwidth.js n=100000 type='fullwidth'                 4.70 %       ±5.64%  ±7.50%  ±9.76%

PR-URL: #29301
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Trivikram Kamat <trivikr.dev@gmail.com>
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
Reviewed-By: Minwoo Jung <minwoo@nodesource.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
  • Loading branch information...
addaleax authored and BridgeAR committed Aug 25, 2019
1 parent 350975e commit 87b8f02daa5af5d0dac3f9f37b75ea9ce935fbdd
Showing with 74 additions and 7 deletions.
  1. +26 −0 benchmark/misc/getstringwidth.js
  2. +26 −7 lib/internal/readline/utils.js
  3. +22 −0 test/parallel/test-icu-stringwidth.js
@@ -0,0 +1,26 @@
'use strict';

const common = require('../common.js');

const bench = common.createBenchmark(main, {
type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'],
n: [10e4]
}, {
flags: ['--expose-internals']
});

function main({ n, type }) {
const { getStringWidth } = require('internal/readline/utils');

const str = ({
ascii: 'foobar'.repeat(100),
mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100),
emojiseq: '👨‍👨‍👧‍👦👨‍👩‍👦‍👦👨‍👩‍👧‍👧👩‍👩‍👧‍👦'.repeat(10),
fullwidth: '你好'.repeat(150)
})[type];

bench.start();
for (let j = 0; j < n; j += 1)
getStringWidth(str);
bench.end(n);
}
@@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
getStringWidth = function getStringWidth(str, options) {
options = options || {};
if (!Number.isInteger(str))
str = stripVTControlCharacters(String(str));
return icu.getStringWidth(
str,
Boolean(options.ambiguousAsFullWidth),
Boolean(options.expandEmojiSequence)
);
if (Number.isInteger(str)) {
// Provide information about the character with code point 'str'.
return icu.getStringWidth(
str,
Boolean(options.ambiguousAsFullWidth),
false
);
}
str = stripVTControlCharacters(String(str));
let width = 0;
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
const code = str.charCodeAt(i);
if (code < 127) {
width += code >= 32;
continue;
}
width += icu.getStringWidth(
str.slice(i),
Boolean(options.ambiguousAsFullWidth),
Boolean(options.expandEmojiSequence)
);
break;
}
return width;
};
isFullWidthCodePoint =
function isFullWidthCodePoint(code, options) {
@@ -69,3 +69,25 @@ assert.strictEqual(

// Control chars and combining chars are zero
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);

// Test that the fast path for ASCII characters yields results consistent
// with the 'slow' path.
for (const ambiguousAsFullWidth of [ false, true ]) {
for (let i = 0; i < 256; i++) {
const char = String.fromCharCode(i);
assert.strictEqual(
readline.getStringWidth(i, { ambiguousAsFullWidth }),
readline.getStringWidth(char, { ambiguousAsFullWidth }));
assert.strictEqual(
readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);

if (i < 32 || (i >= 127 && i < 160)) { // Control character
assert.strictEqual(
readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
} else if (i < 127) { // Regular ASCII character
assert.strictEqual(
readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
}
}
}

0 comments on commit 87b8f02

Please sign in to comment.
You can’t perform that action at this time.