Skip to content

Commit 87521e9

Browse files
joyeecheungjuanarbol
authored andcommitted
deps: V8: backport 1361b2a49d02
Original commit message: [strings] improve array index hash distribution Previously, the hashes stored in a Name's raw_hash_field for decimal numeric strings (potential array indices) consist of the literal integer value along with the length of the string. This means consecutive numeric strings can have consecutive hash values, which can lead to O(n^2) probing for insertion in the worst case when e.g. a non-numeric string happen to land in the these buckets. This patch adds a build-time flag v8_enable_seeded_array_index_hash that scrambles the 24-bit array-index value stored in a Name's raw_hash_field to improve the distribution. x ^= x >> kShift; x = (x * m1) & kMask; // round 1 x ^= x >> kShift; x = (x * m2) & kMask; // round 2 x ^= x >> kShift; // finalize To decode, apply the same steps with the modular inverses of m1 and m2 in reverse order. x ^= x >> kShift; x = (x * m2_inv) & kMask; // round 1 x ^= x >> kShift; x = (x * m1_inv) & kMask; // round 2 x ^= x >> kShift; // finalize where kShift = kArrayIndexValueBits / 2, kMask = kArrayIndexValueMask, m1, m2 (both odd) are the lower bits of the rapidhash secrets, m1_inv, m2_inv (modular inverses) are precomputed modular inverse of m1 and m2. The pre-computed values are appended to the hash_seed ByteArray in ReadOnlyRoots and accessed in generated code to reduce overhead. In call sites that don't already have access to the seeds, we read them from the current isolate group/isolate's read only roots. To consolidate the code that encode/decode these hashes, this patch adds MakeArrayIndexHash/DecodeArrayIndexFromHashField in C++ and CSA that perform seeding/unseeding if enabled, and updates places where encoding/decoding of array index is needed to use them. Bug: 477515021 Change-Id: I350afe511951a54c4378396538152cc56565fd55 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7564330 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Commit-Queue: Joyee Cheung <joyee@igalia.com> Cr-Commit-Position: refs/heads/main@{#105596} Refs: v8/v8@1361b2a Co-authored-by: Joyee Cheung <joyeec9h3@gmail.com> PR-URL: nodejs-private/node-private#828
1 parent 0450133 commit 87521e9

22 files changed

+397
-50
lines changed

common.gypi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
# Reset this number to 0 on major V8 upgrades.
4040
# Increment by one for each non-official patch applied to deps/v8.
41-
'v8_embedder_string': '-node.43',
41+
'v8_embedder_string': '-node.44',
4242

4343
##### V8 defaults for Node.js #####
4444

deps/v8/BUILD.bazel

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ v8_flag(
230230
default = False,
231231
)
232232

233+
v8_flag(
234+
name = "v8_enable_seeded_array_index_hash",
235+
default = False,
236+
)
237+
233238
selects.config_setting_group(
234239
name = "enable_drumbrake_x64",
235240
match_all = [
@@ -485,6 +490,7 @@ v8_config(
485490
"v8_enable_webassembly": "V8_ENABLE_WEBASSEMBLY",
486491
"v8_enable_drumbrake": "V8_ENABLE_DRUMBRAKE",
487492
"v8_enable_drumbrake_tracing": "V8_ENABLE_DRUMBRAKE_TRACING",
493+
"v8_enable_seeded_array_index_hash": "V8_ENABLE_SEEDED_ARRAY_INDEX_HASH",
488494
"v8_jitless": "V8_JITLESS",
489495
"v8_enable_vtunejit": "ENABLE_VTUNE_JIT_INTERFACE",
490496
},

deps/v8/BUILD.gn

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,9 @@ declare_args() {
518518

519519
# Use a hard-coded secret value when hashing.
520520
v8_use_default_hasher_secret = true
521+
522+
# Enable seeded array index hash.
523+
v8_enable_seeded_array_index_hash = false
521524
}
522525

523526
# Derived defaults.
@@ -1209,6 +1212,9 @@ config("features") {
12091212
if (v8_enable_lite_mode) {
12101213
defines += [ "V8_LITE_MODE" ]
12111214
}
1215+
if (v8_enable_seeded_array_index_hash) {
1216+
defines += [ "V8_ENABLE_SEEDED_ARRAY_INDEX_HASH" ]
1217+
}
12121218
if (v8_enable_gdbjit) {
12131219
defines += [ "ENABLE_GDB_JIT_INTERFACE" ]
12141220
}

deps/v8/src/ast/ast-value-factory.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ bool AstRawString::AsArrayIndex(uint32_t* index) const {
8383
// can't be convertible to an array index.
8484
if (!IsIntegerIndex()) return false;
8585
if (length() <= Name::kMaxCachedArrayIndexLength) {
86-
*index = Name::ArrayIndexValueBits::decode(raw_hash_field_);
86+
*index = StringHasher::DecodeArrayIndexFromHashField(
87+
raw_hash_field_, HashSeed(GetReadOnlyRoots()));
8788
return true;
8889
}
8990
// Might be an index, but too big to cache it. Do the slow conversion. This

deps/v8/src/builtins/number.tq

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ transitioning javascript builtin NumberParseFloat(
299299
const hash: NameHash = s.raw_hash_field;
300300
if (IsIntegerIndex(hash) &&
301301
hash.array_index_length < kMaxCachedArrayIndexLength) {
302-
const arrayIndex: uint32 = hash.array_index_value;
302+
const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash);
303303
return SmiFromUint32(arrayIndex);
304304
}
305305
// Fall back to the runtime to convert string to a number.
@@ -350,7 +350,7 @@ transitioning builtin ParseInt(
350350
const hash: NameHash = s.raw_hash_field;
351351
if (IsIntegerIndex(hash) &&
352352
hash.array_index_length < kMaxCachedArrayIndexLength) {
353-
const arrayIndex: uint32 = hash.array_index_value;
353+
const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash);
354354
return SmiFromUint32(arrayIndex);
355355
}
356356
// Fall back to the runtime.

deps/v8/src/builtins/wasm.tq

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,8 +1517,8 @@ builtin WasmStringToDouble(s: String): float64 {
15171517
const hash: NameHash = s.raw_hash_field;
15181518
if (IsIntegerIndex(hash) &&
15191519
hash.array_index_length < kMaxCachedArrayIndexLength) {
1520-
const arrayIndex: int32 = Signed(hash.array_index_value);
1521-
return Convert<float64>(arrayIndex);
1520+
const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash);
1521+
return Convert<float64>(Signed(arrayIndex));
15221522
}
15231523
return StringToFloat64(Flatten(s));
15241524
}

deps/v8/src/codegen/code-stub-assembler.cc

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,6 +2546,56 @@ TNode<Uint32T> CodeStubAssembler::LoadJSReceiverIdentityHash(
25462546
return var_hash.value();
25472547
}
25482548

2549+
#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
2550+
// Mirror C++ StringHasher::SeedArrayIndexValue.
2551+
TNode<Uint32T> CodeStubAssembler::SeedArrayIndexValue(TNode<Uint32T> value) {
2552+
// Load m1 and m2 from the hash seed byte array. In the compiled code
2553+
// these will always come from the read-only roots.
2554+
TNode<ByteArray> hash_seed = CAST(LoadRoot(RootIndex::kHashSeed));
2555+
intptr_t base_offset = OFFSET_OF_DATA_START(ByteArray) - kHeapObjectTag;
2556+
TNode<Uint32T> m1 = Load<Uint32T>(
2557+
hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM1Offset));
2558+
TNode<Uint32T> m2 = Load<Uint32T>(
2559+
hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM2Offset));
2560+
2561+
TNode<Word32T> x = value;
2562+
// 2-round xorshift-multiply.
2563+
x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
2564+
x = Word32And(Uint32Mul(Unsigned(x), m1),
2565+
Uint32Constant(Name::kArrayIndexValueMask));
2566+
x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
2567+
x = Word32And(Uint32Mul(Unsigned(x), m2),
2568+
Uint32Constant(Name::kArrayIndexValueMask));
2569+
x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
2570+
2571+
return Unsigned(x);
2572+
}
2573+
2574+
// Mirror C++ StringHasher::UnseedArrayIndexValue.
2575+
TNode<Uint32T> CodeStubAssembler::UnseedArrayIndexValue(TNode<Uint32T> value) {
2576+
// Load m1_inv and m2_inv from the hash seed byte array. In the compiled code
2577+
// these will always come from the read-only roots.
2578+
TNode<ByteArray> hash_seed = CAST(LoadRoot(RootIndex::kHashSeed));
2579+
intptr_t base_offset = OFFSET_OF_DATA_START(ByteArray) - kHeapObjectTag;
2580+
TNode<Uint32T> m1_inv = Load<Uint32T>(
2581+
hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM1InvOffset));
2582+
TNode<Uint32T> m2_inv = Load<Uint32T>(
2583+
hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM2InvOffset));
2584+
2585+
TNode<Word32T> x = value;
2586+
// 2-round xorshift-multiply (inverse).
2587+
// Xorshift is an involution when kShift is at least half of the value width.
2588+
x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
2589+
x = Word32And(Uint32Mul(Unsigned(x), m2_inv),
2590+
Uint32Constant(Name::kArrayIndexValueMask));
2591+
x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
2592+
x = Word32And(Uint32Mul(Unsigned(x), m1_inv),
2593+
Uint32Constant(Name::kArrayIndexValueMask));
2594+
x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift)));
2595+
return Unsigned(x);
2596+
}
2597+
#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
2598+
25492599
TNode<Uint32T> CodeStubAssembler::LoadNameHashAssumeComputed(TNode<Name> name) {
25502600
TNode<Uint32T> hash_field = LoadNameRawHash(name);
25512601
CSA_DCHECK(this, IsClearWord32(hash_field, Name::kHashNotComputedMask));
@@ -8983,8 +9033,7 @@ TNode<Number> CodeStubAssembler::StringToNumber(TNode<String> input) {
89839033
GotoIf(IsSetWord32(raw_hash_field, Name::kDoesNotContainCachedArrayIndexMask),
89849034
&runtime);
89859035

8986-
var_result = SmiTag(Signed(
8987-
DecodeWordFromWord32<String::ArrayIndexValueBits>(raw_hash_field)));
9036+
var_result = SmiFromUint32(DecodeArrayIndexFromHashField(raw_hash_field));
89889037
Goto(&end);
89899038

89909039
BIND(&runtime);
@@ -9902,9 +9951,8 @@ void CodeStubAssembler::TryToName(TNode<Object> key, Label* if_keyisindex,
99029951

99039952
BIND(&if_has_cached_index);
99049953
{
9905-
TNode<IntPtrT> index =
9906-
Signed(DecodeWordFromWord32<String::ArrayIndexValueBits>(
9907-
raw_hash_field));
9954+
TNode<IntPtrT> index = Signed(ChangeUint32ToWord(
9955+
DecodeArrayIndexFromHashField(raw_hash_field)));
99089956
CSA_DCHECK(this, IntPtrLessThan(index, IntPtrConstant(INT_MAX)));
99099957
*var_index = index;
99109958
Goto(if_keyisindex);

deps/v8/src/codegen/code-stub-assembler.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4614,6 +4614,12 @@ class V8_EXPORT_PRIVATE CodeStubAssembler
46144614
return WordEqual(WordAnd(flags, IntPtrConstant(mask)), IntPtrConstant(0));
46154615
}
46164616

4617+
#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
4618+
// Mirror C++ StringHasher::SeedArrayIndexValue and UnseedArrayIndexValue.
4619+
TNode<Uint32T> SeedArrayIndexValue(TNode<Uint32T> value);
4620+
TNode<Uint32T> UnseedArrayIndexValue(TNode<Uint32T> value);
4621+
#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH
4622+
46174623
private:
46184624
friend class CodeStubArguments;
46194625

deps/v8/src/heap/factory-base.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,8 @@ inline Handle<String> FactoryBase<Impl>::SmiToString(Tagged<Smi> number,
10941094
if (raw->raw_hash_field() == String::kEmptyHashField &&
10951095
number.value() >= 0) {
10961096
uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(
1097-
static_cast<uint32_t>(number.value()), raw->length());
1097+
static_cast<uint32_t>(number.value()), raw->length(),
1098+
HashSeed(read_only_roots()));
10981099
raw->set_raw_hash_field(raw_hash_field);
10991100
}
11001101
}

deps/v8/src/heap/factory.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3949,7 +3949,8 @@ Handle<String> Factory::SizeToString(size_t value, bool check_cache) {
39493949
if (value <= JSArray::kMaxArrayIndex &&
39503950
raw->raw_hash_field() == String::kEmptyHashField) {
39513951
uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(
3952-
static_cast<uint32_t>(value), raw->length());
3952+
static_cast<uint32_t>(value), raw->length(),
3953+
HashSeed(read_only_roots()));
39533954
raw->set_raw_hash_field(raw_hash_field);
39543955
}
39553956
}

0 commit comments

Comments
 (0)