Skip to content

Commit

Permalink
src: implement FastByteLengthUtf8 with simdutf::utf8_length_from_latin1
Browse files Browse the repository at this point in the history
PR-URL: #50840
Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br>
  • Loading branch information
lemire authored and richardlau committed Mar 25, 2024
1 parent 454b4f8 commit a80f660
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
5 changes: 4 additions & 1 deletion benchmark/buffers/buffer-bytelength-string.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
const common = require('../common');

const bench = common.createBenchmark(main, {
type: ['one_byte', 'two_bytes', 'three_bytes', 'four_bytes'],
type: ['one_byte', 'two_bytes', 'three_bytes',
'four_bytes', 'latin1'],
encoding: ['utf8', 'base64'],
repeat: [1, 2, 16, 256], // x16
n: [4e6],
Expand All @@ -14,6 +15,8 @@ const chars = {
two_bytes: 'ΰαβγδεζηθικλμνξο',
three_bytes: '挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿',
four_bytes: '𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢',
latin1: 'Un homme sage est supérieur à toutes ' +
'les insultes qui peuvent lui être adressées, et la meilleure réponse est la patience et la modération.',
};

function getInput(type, repeat, encoding) {
Expand Down
8 changes: 6 additions & 2 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -743,13 +743,17 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {

uint32_t FastByteLengthUtf8(Local<Value> receiver,
const v8::FastOneByteString& source) {
uint32_t result = 0;
// For short inputs, the function call overhead to simdutf is maybe
// not worth it, reserve simdutf for long strings.
if (source.length > 128) {
return simdutf::utf8_length_from_latin1(source.data, source.length);
}
uint32_t length = source.length;
uint32_t result = length;
const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
}

Expand Down

0 comments on commit a80f660

Please sign in to comment.