Skip to content

Commit

Permalink
Specialize DefPathHash table to skip crate IDs
Browse files Browse the repository at this point in the history
Instead, we store just the local crate hash as a bare u64. On decoding,
we recombine it with the crate's stable crate ID stored separately in
metadata. The end result is that we save ~8 bytes/DefIndex in metadata
size.

One key detail here is that we no longer distinguish in encoded metadata
between present and non-present DefPathHashes. It used to be highly
likely we could distinguish as we used DefPathHash::default(), an
all-zero representation. However in theory even that is fallible as
nothing strictly prevents the StableCrateId from being zero.
  • Loading branch information
Mark-Simulacrum committed Dec 23, 2023
1 parent edcbcc7 commit 6630d69
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 31 deletions.
14 changes: 11 additions & 3 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Expand Up @@ -6,6 +6,7 @@ use crate::rmeta::*;

use rustc_ast as ast;
use rustc_data_structures::captures::Captures;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::owned_slice::OwnedSlice;
use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock};
use rustc_data_structures::unhash::UnhashMap;
Expand Down Expand Up @@ -1489,9 +1490,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
index: DefIndex,
def_path_hashes: &mut FxHashMap<DefIndex, DefPathHash>,
) -> DefPathHash {
*def_path_hashes
.entry(index)
.or_insert_with(|| self.root.tables.def_path_hashes.get(self, index))
*def_path_hashes.entry(index).or_insert_with(|| {
// This is a hack to workaround the fact that we can't easily encode/decode a Hash64
// into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to
// relax the Default restriction will likely fix this.
let fingerprint = Fingerprint::new(
self.root.stable_crate_id.as_u64(),
self.root.tables.def_path_hashes.get(self, index),
);
DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1)
})
}

#[inline]
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Expand Up @@ -467,13 +467,13 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let def_key = self.lazy(table.def_key(def_index));
let def_path_hash = table.def_path_hash(def_index);
self.tables.def_keys.set_some(def_index, def_key);
self.tables.def_path_hashes.set(def_index, def_path_hash);
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
}
} else {
for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
let def_key = self.lazy(def_key);
self.tables.def_keys.set_some(def_index, def_key);
self.tables.def_path_hashes.set(def_index, *def_path_hash);
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion compiler/rustc_metadata/src/rmeta/mod.rs
Expand Up @@ -386,7 +386,12 @@ define_tables! {
is_type_alias_impl_trait: Table<DefIndex, bool>,
type_alias_is_lazy: Table<DefIndex, bool>,
attr_flags: Table<DefIndex, AttrFlags>,
def_path_hashes: Table<DefIndex, DefPathHash>,
// The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same
// StableCrateId, so we omit encoding those into the table.
//
// Note also that this table is fully populated (no gaps) as every DefIndex should have a
// corresponding DefPathHash.
def_path_hashes: Table<DefIndex, u64>,
explicit_item_bounds: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
inferred_outlives_of: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
inherent_impls: Table<DefIndex, LazyArray<DefIndex>>,
Expand Down
23 changes: 0 additions & 23 deletions compiler/rustc_metadata/src/rmeta/table.rs
@@ -1,6 +1,5 @@
use crate::rmeta::*;

use rustc_data_structures::fingerprint::Fingerprint;
use rustc_hir::def::CtorOf;
use rustc_index::Idx;

Expand Down Expand Up @@ -44,12 +43,6 @@ impl<T> IsDefault for LazyArray<T> {
}
}

impl IsDefault for DefPathHash {
fn is_default(&self) -> bool {
self.0 == Fingerprint::ZERO
}
}

impl IsDefault for UnusedGenericParams {
fn is_default(&self) -> bool {
// UnusedGenericParams encodes the *un*usedness as a bitset.
Expand Down Expand Up @@ -234,22 +227,6 @@ fixed_size_enum! {
}
}

// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost.
impl FixedSizeEncoding for DefPathHash {
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 16]) -> Self {
DefPathHash(Fingerprint::from_le_bytes(*b))
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 16]) {
debug_assert!(!self.is_default());
*b = self.0.to_le_bytes();
}
}

// We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case.
impl FixedSizeEncoding for Option<RawDefId> {
type ByteArray = [u8; 8];
Expand Down
2 changes: 0 additions & 2 deletions compiler/rustc_span/src/def_id.rs
Expand Up @@ -114,8 +114,6 @@ impl DefPathHash {
}

/// Returns the crate-local part of the [DefPathHash].
///
/// Used for tests.
#[inline]
pub fn local_hash(&self) -> Hash64 {
self.0.split().1
Expand Down

0 comments on commit 6630d69

Please sign in to comment.