Skip to content

Commit b120ca9

Browse files
Lazy-page Symbol interner
This is a simple impl that works well only on 64-bit platforms where we can lazily allocate zeroed pages (i.e., not actually use up memory for zeroed pages), meaning that we can grab a 4GB chunk of memory for all interned strings and then just offset allocate into that. This already fixes unsoundness in the Symbol::as_str by leaking that 4gb memory chunk, but could also be faster (TBD). A rewrite supporting ~any platform should be pretty straightforward by sharding the allocation and manually lazily allocating it in chunks.
1 parent e08cd3c commit b120ca9

File tree

7 files changed

+140
-51
lines changed

7 files changed

+140
-51
lines changed

Cargo.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,8 @@ version = "0.15.2"
15331533
source = "registry+https://github.com/rust-lang/crates.io-index"
15341534
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
15351535
dependencies = [
1536+
"allocator-api2",
1537+
"equivalent",
15361538
"foldhash",
15371539
"serde",
15381540
]
@@ -3614,6 +3616,7 @@ dependencies = [
36143616
"either",
36153617
"elsa",
36163618
"ena",
3619+
"hashbrown 0.15.2",
36173620
"indexmap",
36183621
"jobserver",
36193622
"libc",
@@ -4510,6 +4513,7 @@ version = "0.0.0"
45104513
dependencies = [
45114514
"blake3",
45124515
"derive-where",
4516+
"hashbrown 0.15.2",
45134517
"indexmap",
45144518
"itoa",
45154519
"md-5",

compiler/rustc_data_structures/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ bitflags = "2.4.1"
1010
either = "1.0"
1111
elsa = "=1.7.1"
1212
ena = "0.14.3"
13+
hashbrown = "0.15.2"
1314
indexmap = "2.4.0"
1415
jobserver_crate = { version = "0.1.28", package = "jobserver" }
1516
measureme = "11"

compiler/rustc_data_structures/src/marker.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ impl_dyn_send!(
6868
[std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
6969
[std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
7070
[std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
71+
[hashbrown::HashTable<T> where T: DynSend]
7172
[std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]
7273
[Vec<T, A> where T: DynSend, A: std::alloc::Allocator + DynSend]
7374
[Box<T, A> where T: ?Sized + DynSend, A: std::alloc::Allocator + DynSend]
@@ -142,6 +143,7 @@ impl_dyn_sync!(
142143
[std::sync::LazyLock<T, F> where T: DynSend + DynSync, F: DynSend]
143144
[std::collections::HashSet<K, S> where K: DynSync, S: DynSync]
144145
[std::collections::HashMap<K, V, S> where K: DynSync, V: DynSync, S: DynSync]
146+
[hashbrown::HashTable<T> where T: DynSync]
145147
[std::collections::BTreeMap<K, V, A> where K: DynSync, V: DynSync, A: std::alloc::Allocator + Clone + DynSync]
146148
[Vec<T, A> where T: DynSync, A: std::alloc::Allocator + DynSync]
147149
[Box<T, A> where T: ?Sized + DynSync, A: std::alloc::Allocator + DynSync]

compiler/rustc_span/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77
# tidy-alphabetical-start
88
blake3 = "1.5.2"
99
derive-where = "1.2.7"
10+
hashbrown = "0.15.2"
1011
indexmap = { version = "2.0.0" }
1112
itoa = "1.0"
1213
md5 = { package = "md-5", version = "0.10.0" }

compiler/rustc_span/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#![feature(round_char_boundary)]
3232
#![feature(rustc_attrs)]
3333
#![feature(rustdoc_internals)]
34+
#![feature(str_from_raw_parts)]
35+
#![feature(strict_provenance_atomic_ptr)]
3436
#![warn(unreachable_pub)]
3537
// tidy-alphabetical-end
3638

compiler/rustc_span/src/symbol.rs

Lines changed: 123 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
//! allows bidirectional lookup; i.e., given a value, one can easily find the
33
//! type, and vice versa.
44
5-
use std::hash::{Hash, Hasher};
5+
use std::alloc::Layout;
6+
use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
7+
use std::sync::Mutex;
8+
use std::sync::atomic::{AtomicPtr, Ordering};
69
use std::{fmt, str};
710

8-
use rustc_arena::DroplessArena;
9-
use rustc_data_structures::fx::FxIndexSet;
11+
use rustc_data_structures::fx::FxHasher;
1012
use rustc_data_structures::stable_hasher::{
1113
HashStable, StableCompare, StableHasher, ToStableHashKey,
1214
};
@@ -2461,18 +2463,9 @@ impl Symbol {
24612463
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
24622464
}
24632465

2464-
/// Access the underlying string. This is a slowish operation because it
2465-
/// requires locking the symbol interner.
2466-
///
2467-
/// Note that the lifetime of the return value is a lie. It's not the same
2468-
/// as `&self`, but actually tied to the lifetime of the underlying
2469-
/// interner. Interners are long-lived, and there are very few of them, and
2470-
/// this function is typically used for short-lived things, so in practice
2471-
/// it works out ok.
2466+
/// Access the underlying string.
24722467
pub fn as_str(&self) -> &str {
2473-
with_session_globals(|session_globals| unsafe {
2474-
std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self))
2475-
})
2468+
with_session_globals(|session_globals| session_globals.symbol_interner.get(*self))
24762469
}
24772470

24782471
pub fn as_u32(self) -> u32 {
@@ -2527,53 +2520,137 @@ impl StableCompare for Symbol {
25272520
}
25282521
}
25292522

2530-
pub(crate) struct Interner(Lock<InternerInner>);
2523+
// This is never de-initialized and stores interned &str in static storage.
2524+
// Each str is stored length-prefixed (u32), and we allow for random-access indexing with a u32
2525+
// index by direct lookup in the arena. Indices <2^16 are stored in a separate structure (they are
2526+
// pre-allocated at dense addresses so we can't use the same lockless O(1) hack for them).
2527+
static GLOBAL_ARENA: std::sync::LazyLock<StringArena> =
2528+
std::sync::LazyLock::new(|| StringArena::new());
2529+
2530+
struct StringArena {
2531+
base: *mut u8,
2532+
end: *mut u8,
2533+
write_at: AtomicPtr<u8>,
2534+
// this guards writes to `write_at`, but not reads, which proceed lock-free. write_at is only moved
2535+
// forward so this ends up being safe.
2536+
writer: std::sync::Mutex<()>,
2537+
}
2538+
2539+
unsafe impl Sync for StringArena {}
2540+
unsafe impl Send for StringArena {}
2541+
2542+
impl StringArena {
2543+
fn new() -> Self {
2544+
unsafe {
2545+
let layout =
2546+
Layout::from_size_align(u32::MAX as usize, std::mem::align_of::<u32>()).unwrap();
2547+
let allocation = std::alloc::alloc_zeroed(layout);
2548+
if allocation.is_null() {
2549+
std::alloc::handle_alloc_error(layout)
2550+
}
2551+
StringArena {
2552+
base: allocation,
2553+
end: allocation.add(layout.size()),
2554+
// Reserve 2^16 u32 indices -- these will be used for pre-filled interning where we
2555+
// have a dense SymbolIndex space. We could make this exact but it doesn't really
2556+
// matter for this initial test anyway.
2557+
write_at: AtomicPtr::new(allocation.add(u16::MAX as usize)),
2558+
writer: Mutex::new(()),
2559+
}
2560+
}
2561+
}
2562+
2563+
fn alloc(&self, s: &str) -> u32 {
2564+
unsafe {
2565+
let _guard = self.writer.lock().unwrap();
2566+
// Allocate a chunk of the region, and fill it with the &str's length and bytes.
2567+
let dst = self.write_at.load(Ordering::Acquire);
2568+
// Assert we're in-bounds.
2569+
assert!(
2570+
dst.addr().checked_add(4).unwrap().checked_add(s.len()).unwrap() < self.end.addr()
2571+
);
2572+
dst.cast::<u32>().write_unaligned(s.len().try_into().unwrap());
2573+
dst.add(4).copy_from_nonoverlapping(s.as_ptr(), s.len());
2574+
2575+
// Semantically this releases the memory for readers.
2576+
self.write_at.store(dst.add(4 + s.len()), Ordering::Release);
2577+
2578+
dst.byte_offset_from(self.base).try_into().unwrap()
2579+
}
2580+
}
2581+
2582+
fn get(&self, idx: u32) -> &str {
2583+
// SAFETY: `write_at` is only updated after writing to a given region, so we never have
2584+
// concurrent writes to this memory. It's initialized at allocation (alloc_zeroed) and we
2585+
// only write initialized bytes to it, so there's also never any uninit bytes in the region.
2586+
let region = unsafe {
2587+
let end = self.write_at.load(Ordering::Acquire);
2588+
std::slice::from_raw_parts(self.base, end.offset_from(self.base) as usize)
2589+
};
2590+
2591+
let len = u32::from_ne_bytes(region[idx as usize..idx as usize + 4].try_into().unwrap());
2592+
let data = &region[idx as usize + 4..][..len as usize];
2593+
2594+
// SAFETY: We (in theory) could be passed a random `idx` into the memory region, so we need
2595+
// to re-check that the region is actually UTF-8 before returning. If it is, then it is safe
2596+
// to return &str, though it might not be a *useful* &str due to having near-random
2597+
// contents.
2598+
std::str::from_utf8(data).unwrap()
2599+
}
2600+
}
2601+
2602+
pub(crate) struct Interner(&'static [&'static str], Lock<InternerInner>);
25312603

2532-
// The `&'static str`s in this type actually point into the arena.
2533-
//
2534-
// This type is private to prevent accidentally constructing more than one
2535-
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2536-
// between `Interner`s.
25372604
struct InternerInner {
2538-
arena: DroplessArena,
2539-
strings: FxIndexSet<&'static str>,
2605+
strings: hashbrown::HashTable<Symbol>,
25402606
}
25412607

25422608
impl Interner {
2543-
fn prefill(init: &[&'static str]) -> Self {
2544-
Interner(Lock::new(InternerInner {
2545-
arena: Default::default(),
2546-
strings: init.iter().copied().collect(),
2547-
}))
2609+
fn prefill(init: &'static [&'static str]) -> Self {
2610+
assert!(init.len() < u16::MAX as usize);
2611+
let mut strings = hashbrown::HashTable::new();
2612+
2613+
for (idx, s) in init.iter().copied().enumerate() {
2614+
let mut hasher = FxHasher::default();
2615+
s.hash(&mut hasher);
2616+
let hash = hasher.finish();
2617+
strings.insert_unique(hash, Symbol::new(idx as u32), |val| {
2618+
// has to be from `init` because we haven't yet inserted anything except those.
2619+
BuildHasherDefault::<FxHasher>::default().hash_one(init[val.0.index()])
2620+
});
2621+
}
2622+
2623+
Interner(init, Lock::new(InternerInner { strings }))
25482624
}
25492625

25502626
#[inline]
25512627
fn intern(&self, string: &str) -> Symbol {
2552-
let mut inner = self.0.lock();
2553-
if let Some(idx) = inner.strings.get_index_of(string) {
2554-
return Symbol::new(idx as u32);
2628+
let hash = BuildHasherDefault::<FxHasher>::default().hash_one(string);
2629+
let mut inner = self.1.lock();
2630+
match inner.strings.find_entry(hash, |v| self.get(*v) == string) {
2631+
Ok(e) => return *e.get(),
2632+
Err(e) => {
2633+
let idx = GLOBAL_ARENA.alloc(string);
2634+
let res = Symbol::new(idx as u32);
2635+
2636+
e.into_table().insert_unique(hash, res, |val| {
2637+
BuildHasherDefault::<FxHasher>::default().hash_one(self.get(*val))
2638+
});
2639+
2640+
res
2641+
}
25552642
}
2556-
2557-
let string: &str = inner.arena.alloc_str(string);
2558-
2559-
// SAFETY: we can extend the arena allocation to `'static` because we
2560-
// only access these while the arena is still alive.
2561-
let string: &'static str = unsafe { &*(string as *const str) };
2562-
2563-
// This second hash table lookup can be avoided by using `RawEntryMut`,
2564-
// but this code path isn't hot enough for it to be worth it. See
2565-
// #91445 for details.
2566-
let (idx, is_new) = inner.strings.insert_full(string);
2567-
debug_assert!(is_new); // due to the get_index_of check above
2568-
2569-
Symbol::new(idx as u32)
25702643
}
25712644

25722645
/// Get the symbol as a string.
25732646
///
25742647
/// [`Symbol::as_str()`] should be used in preference to this function.
2575-
fn get(&self, symbol: Symbol) -> &str {
2576-
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
2648+
fn get(&self, symbol: Symbol) -> &'static str {
2649+
if symbol.0.index() < u16::MAX as usize {
2650+
self.0[symbol.0.index()]
2651+
} else {
2652+
GLOBAL_ARENA.get(symbol.0.as_u32())
2653+
}
25772654
}
25782655
}
25792656

compiler/rustc_span/src/symbol/tests.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@ use crate::create_default_session_globals_then;
44
#[test]
55
fn interner_tests() {
66
let i = Interner::prefill(&[]);
7+
let dog = i.intern("dog");
78
// first one is zero:
8-
assert_eq!(i.intern("dog"), Symbol::new(0));
9+
assert_eq!(i.intern("dog"), dog);
910
// re-use gets the same entry:
10-
assert_eq!(i.intern("dog"), Symbol::new(0));
11+
assert_eq!(i.intern("dog"), dog);
1112
// different string gets a different #:
12-
assert_eq!(i.intern("cat"), Symbol::new(1));
13-
assert_eq!(i.intern("cat"), Symbol::new(1));
13+
let cat = i.intern("cat");
14+
assert_eq!(i.intern("cat"), cat);
15+
assert_eq!(i.intern("cat"), cat);
1416
// dog is still at zero
15-
assert_eq!(i.intern("dog"), Symbol::new(0));
17+
assert_eq!(i.intern("dog"), dog);
1618
}
1719

1820
#[test]

0 commit comments

Comments
 (0)