Skip to content

Commit e02f012

Browse files
Auto merge of #148597 - yotamofek:pr/search_index/crate_path_ref_map, r=<try>
Allow looking up "crate paths" in map without allocating a vector
2 parents c880acd + b9907e0 commit e02f012

File tree

4 files changed

+122
-73
lines changed

4 files changed

+122
-73
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4793,11 +4793,13 @@ dependencies = [
47934793
"askama",
47944794
"base64",
47954795
"expect-test",
4796+
"hashbrown",
47964797
"indexmap",
47974798
"itertools",
47984799
"minifier",
47994800
"pulldown-cmark-escape",
48004801
"regex",
4802+
"rustc-hash 2.1.1",
48014803
"rustdoc-json-types",
48024804
"serde",
48034805
"serde_json",

src/librustdoc/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ path = "lib.rs"
1212
arrayvec = { version = "0.7", default-features = false }
1313
askama = { version = "0.14", default-features = false, features = ["alloc", "config", "derive"] }
1414
base64 = "0.21.7"
15+
hashbrown = "0.15"
1516
indexmap = { version = "2", features = ["serde"] }
1617
itertools = "0.12"
1718
minifier = { version = "0.3.5", default-features = false }
1819
pulldown-cmark-escape = { version = "0.11.0", features = ["simd"] }
1920
regex = "1"
21+
rustc-hash = "2.1.1"
2022
rustdoc-json-types = { path = "../rustdoc-json-types" }
2123
serde = { version = "1.0", features = ["derive"] }
2224
serde_json = "1.0"

src/librustdoc/html/render/search_index.rs

Lines changed: 117 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,20 @@ pub(crate) mod encode;
22
mod serde;
33

44
use std::collections::BTreeSet;
5-
use std::collections::hash_map::Entry;
5+
use std::hash::{Hash, Hasher};
66
use std::io;
77
use std::path::Path;
88
use std::string::FromUtf8Error;
99

1010
use ::serde::de::{self, Deserializer, Error as _};
1111
use ::serde::ser::{SerializeSeq, Serializer};
1212
use ::serde::{Deserialize, Serialize};
13+
use hashbrown::hash_map::EntryRef;
14+
use hashbrown::{Equivalent, HashMap};
1315
use rustc_ast::join_path_syms;
1416
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap};
1517
use rustc_data_structures::thin_vec::ThinVec;
18+
use rustc_hash::FxBuildHasher;
1619
use rustc_hir::attrs::AttributeKind;
1720
use rustc_hir::find_attr;
1821
use rustc_middle::ty::TyCtxt;
@@ -30,6 +33,46 @@ use crate::formats::item_type::ItemType;
3033
use crate::html::markdown::short_markdown_summary;
3134
use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, RenderTypeId};
3235

36+
#[derive(Clone, Debug, PartialEq, Eq)]
37+
struct CratePath(ItemType, Vec<Symbol>);
38+
39+
impl Hash for CratePath {
40+
fn hash<H: Hasher>(&self, state: &mut H) {
41+
CratePathRef(self.0, &[&self.1[..]]).hash(state);
42+
}
43+
}
44+
45+
/// This struct allows doing lookups on a map where the keys are [`CratePath`]s, without having to allocate a vector.
46+
struct CratePathRef<'sym>(ItemType, &'sym [&'sym [Symbol]]);
47+
48+
impl<'sym> CratePathRef<'sym> {
49+
fn symbols(&self) -> impl Iterator<Item = &Symbol> {
50+
self.1.iter().copied().flatten()
51+
}
52+
}
53+
54+
impl<'a, 'sym> From<&'a CratePathRef<'sym>> for CratePath {
55+
fn from(value: &'a CratePathRef<'sym>) -> Self {
56+
Self(value.0, value.symbols().copied().collect())
57+
}
58+
}
59+
60+
impl<'sym> Equivalent<CratePath> for CratePathRef<'sym> {
61+
fn equivalent(&self, key: &CratePath) -> bool {
62+
self.0 == key.0 && self.symbols().eq(&key.1)
63+
}
64+
}
65+
66+
impl<'sym> Hash for CratePathRef<'sym> {
67+
fn hash<H: Hasher>(&self, state: &mut H) {
68+
self.0.hash(state);
69+
state.write_length_prefix(self.symbols().count());
70+
for sym in self.symbols() {
71+
sym.hash(state);
72+
}
73+
}
74+
}
75+
3376
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
3477
pub(crate) struct SerializedSearchIndex {
3578
// data from disk
@@ -55,7 +98,7 @@ pub(crate) struct SerializedSearchIndex {
5598
generic_inverted_index: Vec<Vec<Vec<u32>>>,
5699
// generated in-memory backref cache
57100
#[serde(skip)]
58-
crate_paths_index: FxHashMap<(ItemType, Vec<Symbol>), usize>,
101+
crate_paths_index: HashMap<CratePath, usize, FxBuildHasher>,
59102
}
60103

61104
impl SerializedSearchIndex {
@@ -185,7 +228,7 @@ impl SerializedSearchIndex {
185228
// generic_inverted_index is not the same length as other columns,
186229
// because it's actually a completely different set of objects
187230

188-
let mut crate_paths_index: FxHashMap<(ItemType, Vec<Symbol>), usize> = FxHashMap::default();
231+
let mut crate_paths_index = HashMap::default();
189232
for (i, (name, path_data)) in names.iter().zip(path_data.iter()).enumerate() {
190233
if let Some(path_data) = path_data {
191234
let full_path = if path_data.module_path.is_empty() {
@@ -195,7 +238,7 @@ impl SerializedSearchIndex {
195238
full_path.push(Symbol::intern(name));
196239
full_path
197240
};
198-
crate_paths_index.insert((path_data.ty, full_path), i);
241+
crate_paths_index.insert(CratePath(path_data.ty, full_path), i);
199242
}
200243
}
201244

@@ -225,15 +268,10 @@ impl SerializedSearchIndex {
225268
assert_eq!(self.names.len(), self.path_data.len());
226269
if let Some(path_data) = &path_data
227270
&& let name = Symbol::intern(&name)
228-
&& let fqp = if path_data.module_path.is_empty() {
229-
vec![name]
230-
} else {
231-
let mut v = path_data.module_path.clone();
232-
v.push(name);
233-
v
234-
}
235-
&& let Some(&other_path) = self.crate_paths_index.get(&(path_data.ty, fqp))
236-
&& self.path_data.get(other_path).map_or(false, Option::is_some)
271+
&& let Some(&other_path) = self
272+
.crate_paths_index
273+
.get(&CratePathRef(path_data.ty, &[&path_data.module_path, &[name]]))
274+
&& let Some(Some(_)) = self.path_data.get(other_path)
237275
{
238276
self.path_data.push(None);
239277
} else {
@@ -255,20 +293,20 @@ impl SerializedSearchIndex {
255293
///
256294
/// The returned ID can be used to attach more data to the search result.
257295
fn add_entry(&mut self, name: Symbol, entry_data: EntryData, desc: String) -> usize {
258-
let fqp = if let Some(module_path_index) = entry_data.module_path {
259-
let mut fqp = self.path_data[module_path_index].as_ref().unwrap().module_path.clone();
260-
fqp.push(Symbol::intern(&self.names[module_path_index]));
261-
fqp.push(name);
262-
fqp
296+
let fqp: [&[Symbol]; _] = if let Some(module_path_index) = entry_data.module_path {
297+
[
298+
&self.path_data[module_path_index].as_ref().unwrap().module_path,
299+
&[Symbol::intern(&self.names[module_path_index]), name],
300+
]
263301
} else {
264-
vec![name]
302+
[&[name], &[]]
265303
};
266304
// If a path with the same name already exists, but no entry does,
267305
// we can fill in the entry without having to allocate a new row ID.
268306
//
269307
// Because paths and entries both share the same index, using the same
270308
// ID saves space by making the tree smaller.
271-
if let Some(&other_path) = self.crate_paths_index.get(&(entry_data.ty, fqp))
309+
if let Some(&other_path) = self.crate_paths_index.get(&CratePathRef(entry_data.ty, &fqp))
272310
&& self.entry_data[other_path].is_none()
273311
&& self.descs[other_path].is_empty()
274312
{
@@ -291,9 +329,9 @@ impl SerializedSearchIndex {
291329

292330
fn get_id_by_module_path(&mut self, path: &[Symbol]) -> usize {
293331
let ty = if path.len() == 1 { ItemType::ExternCrate } else { ItemType::Module };
294-
match self.crate_paths_index.entry((ty, path.to_vec())) {
295-
Entry::Occupied(index) => *index.get(),
296-
Entry::Vacant(slot) => {
332+
match self.crate_paths_index.entry_ref(&CratePathRef(ty, &[path])) {
333+
EntryRef::Occupied(index) => *index.get(),
334+
EntryRef::Vacant(slot) => {
297335
slot.insert(self.path_data.len());
298336
let (name, module_path) = path.split_last().unwrap();
299337
self.push_path(
@@ -310,16 +348,18 @@ impl SerializedSearchIndex {
310348
let mut skips = FxHashSet::default();
311349
for (other_pathid, other_path_data) in other.path_data.iter().enumerate() {
312350
if let Some(other_path_data) = other_path_data {
313-
let mut fqp = other_path_data.module_path.clone();
314351
let name = Symbol::intern(&other.names[other_pathid]);
315-
fqp.push(name);
352+
let fqp = [&other_path_data.module_path[..], &[name]];
316353
let self_pathid = other_entryid_offset + other_pathid;
317-
let self_pathid = match self.crate_paths_index.entry((other_path_data.ty, fqp)) {
318-
Entry::Vacant(slot) => {
354+
let self_pathid = match self
355+
.crate_paths_index
356+
.entry_ref(&CratePathRef(other_path_data.ty, &fqp))
357+
{
358+
EntryRef::Vacant(slot) => {
319359
slot.insert(self_pathid);
320360
self_pathid
321361
}
322-
Entry::Occupied(existing_entryid) => {
362+
EntryRef::Occupied(existing_entryid) => {
323363
skips.insert(other_pathid);
324364
let self_pathid = *existing_entryid.get();
325365
let new_type_data = match (
@@ -1301,9 +1341,9 @@ pub(crate) fn build_index(
13011341
let crate_doc =
13021342
short_markdown_summary(&krate.module.doc_value(), &krate.module.link_names(cache));
13031343
let crate_idx = {
1304-
let crate_path = (ItemType::ExternCrate, vec![crate_name]);
1305-
match serialized_index.crate_paths_index.entry(crate_path) {
1306-
Entry::Occupied(index) => {
1344+
let crate_path = CratePathRef(ItemType::ExternCrate, &[&[crate_name]]);
1345+
match serialized_index.crate_paths_index.entry_ref(&crate_path) {
1346+
EntryRef::Occupied(index) => {
13071347
let index = *index.get();
13081348
serialized_index.descs[index] = crate_doc;
13091349
for type_data in serialized_index.type_data.iter_mut() {
@@ -1352,7 +1392,7 @@ pub(crate) fn build_index(
13521392
}
13531393
index
13541394
}
1355-
Entry::Vacant(slot) => {
1395+
EntryRef::Vacant(slot) => {
13561396
let krate = serialized_index.names.len();
13571397
slot.insert(krate);
13581398
serialized_index.push(
@@ -1393,9 +1433,12 @@ pub(crate) fn build_index(
13931433
.or_else(|| check_external.then(|| cache.external_paths.get(&defid)).flatten())
13941434
.map(|&(ref fqp, ty)| {
13951435
let pathid = serialized_index.names.len();
1396-
match serialized_index.crate_paths_index.entry((ty, fqp.clone())) {
1397-
Entry::Occupied(entry) => *entry.get(),
1398-
Entry::Vacant(entry) => {
1436+
match serialized_index
1437+
.crate_paths_index
1438+
.entry_ref(&CratePathRef(ty, &[&&fqp[..]]))
1439+
{
1440+
EntryRef::Occupied(entry) => *entry.get(),
1441+
EntryRef::Vacant(entry) => {
13991442
entry.insert(pathid);
14001443
let (name, path) = fqp.split_last().unwrap();
14011444
serialized_index.push_path(
@@ -1542,46 +1585,47 @@ pub(crate) fn build_index(
15421585
used_in_function_signature: &mut BTreeSet<isize>,
15431586
) -> RenderTypeId {
15441587
let pathid = serialized_index.names.len();
1545-
let pathid = match serialized_index.crate_paths_index.entry((ty, path.to_vec())) {
1546-
Entry::Occupied(entry) => {
1547-
let id = *entry.get();
1548-
if serialized_index.type_data[id].as_mut().is_none() {
1549-
serialized_index.type_data[id] = Some(TypeData {
1550-
search_unbox,
1551-
inverted_function_inputs_index: Vec::new(),
1552-
inverted_function_output_index: Vec::new(),
1553-
});
1554-
} else if search_unbox {
1555-
serialized_index.type_data[id].as_mut().unwrap().search_unbox = true;
1588+
let pathid =
1589+
match serialized_index.crate_paths_index.entry_ref(&CratePathRef(ty, &[path])) {
1590+
EntryRef::Occupied(entry) => {
1591+
let id = *entry.get();
1592+
if serialized_index.type_data[id].as_mut().is_none() {
1593+
serialized_index.type_data[id] = Some(TypeData {
1594+
search_unbox,
1595+
inverted_function_inputs_index: Vec::new(),
1596+
inverted_function_output_index: Vec::new(),
1597+
});
1598+
} else if search_unbox {
1599+
serialized_index.type_data[id].as_mut().unwrap().search_unbox = true;
1600+
}
1601+
id
15561602
}
1557-
id
1558-
}
1559-
Entry::Vacant(entry) => {
1560-
entry.insert(pathid);
1561-
let (name, path) = path.split_last().unwrap();
1562-
serialized_index.push_type(
1563-
name.to_string(),
1564-
PathData {
1565-
ty,
1566-
module_path: path.to_vec(),
1567-
exact_module_path: if let Some(exact_path) = exact_path
1568-
&& let Some((name2, exact_path)) = exact_path.split_last()
1569-
&& name == name2
1570-
{
1571-
Some(exact_path.to_vec())
1572-
} else {
1573-
None
1603+
EntryRef::Vacant(entry) => {
1604+
entry.insert(pathid);
1605+
let (name, path) = path.split_last().unwrap();
1606+
serialized_index.push_type(
1607+
name.to_string(),
1608+
PathData {
1609+
ty,
1610+
module_path: path.to_vec(),
1611+
exact_module_path: if let Some(exact_path) = exact_path
1612+
&& let Some((name2, exact_path)) = exact_path.split_last()
1613+
&& name == name2
1614+
{
1615+
Some(exact_path.to_vec())
1616+
} else {
1617+
None
1618+
},
15741619
},
1575-
},
1576-
TypeData {
1577-
inverted_function_inputs_index: Vec::new(),
1578-
inverted_function_output_index: Vec::new(),
1579-
search_unbox,
1580-
},
1581-
);
1582-
pathid
1583-
}
1584-
};
1620+
TypeData {
1621+
inverted_function_inputs_index: Vec::new(),
1622+
inverted_function_output_index: Vec::new(),
1623+
search_unbox,
1624+
},
1625+
);
1626+
pathid
1627+
}
1628+
};
15851629
used_in_function_signature.insert(isize::try_from(pathid).unwrap());
15861630
RenderTypeId::Index(isize::try_from(pathid).unwrap())
15871631
}

src/librustdoc/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#![feature(box_patterns)]
1212
#![feature(file_buffered)]
1313
#![feature(formatting_options)]
14+
#![feature(hasher_prefixfree_extras)]
1415
#![feature(if_let_guard)]
1516
#![feature(iter_advance_by)]
1617
#![feature(iter_intersperse)]

0 commit comments

Comments
 (0)