Skip to content

Commit

Permalink
Auto merge of #119238 - Mark-Simulacrum:def-hash-efficiency, r=cjgillot
Browse files Browse the repository at this point in the history
Skip duplicate stable crate ID encoding into metadata

Instead, we store just the local crate hash as a bare u64. On decoding,
we recombine it with the crate's stable crate ID stored separately in
metadata. The end result is that we save ~8 bytes/DefIndex in metadata
size.

One key detail here is that we no longer distinguish in encoded metadata
between present and non-present DefPathHashes. It used to be highly
likely we could distinguish as we used DefPathHash::default(), an
all-zero representation. However in theory even that is fallible as
nothing strictly prevents the StableCrateId from being zero. In review it
was pointed out that we should never have a missing hash for a DefIndex anyway,
so this shouldn't matter.
  • Loading branch information
bors committed Dec 24, 2023
2 parents 3166bbe + 6630d69 commit cf64273
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 31 deletions.
14 changes: 11 additions & 3 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Expand Up @@ -6,6 +6,7 @@ use crate::rmeta::*;

use rustc_ast as ast;
use rustc_data_structures::captures::Captures;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::owned_slice::OwnedSlice;
use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock};
use rustc_data_structures::unhash::UnhashMap;
Expand Down Expand Up @@ -1489,9 +1490,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
index: DefIndex,
def_path_hashes: &mut FxHashMap<DefIndex, DefPathHash>,
) -> DefPathHash {
*def_path_hashes
.entry(index)
.or_insert_with(|| self.root.tables.def_path_hashes.get(self, index))
*def_path_hashes.entry(index).or_insert_with(|| {
// This is a hack to workaround the fact that we can't easily encode/decode a Hash64
// into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to
// relax the Default restriction will likely fix this.
let fingerprint = Fingerprint::new(
self.root.stable_crate_id.as_u64(),
self.root.tables.def_path_hashes.get(self, index),
);
DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1)
})
}

#[inline]
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Expand Up @@ -467,13 +467,13 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let def_key = self.lazy(table.def_key(def_index));
let def_path_hash = table.def_path_hash(def_index);
self.tables.def_keys.set_some(def_index, def_key);
self.tables.def_path_hashes.set(def_index, def_path_hash);
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
}
} else {
for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
let def_key = self.lazy(def_key);
self.tables.def_keys.set_some(def_index, def_key);
self.tables.def_path_hashes.set(def_index, *def_path_hash);
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion compiler/rustc_metadata/src/rmeta/mod.rs
Expand Up @@ -386,7 +386,12 @@ define_tables! {
is_type_alias_impl_trait: Table<DefIndex, bool>,
type_alias_is_lazy: Table<DefIndex, bool>,
attr_flags: Table<DefIndex, AttrFlags>,
def_path_hashes: Table<DefIndex, DefPathHash>,
// The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same
// StableCrateId, so we omit encoding those into the table.
//
// Note also that this table is fully populated (no gaps) as every DefIndex should have a
// corresponding DefPathHash.
def_path_hashes: Table<DefIndex, u64>,
explicit_item_bounds: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
inferred_outlives_of: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
inherent_impls: Table<DefIndex, LazyArray<DefIndex>>,
Expand Down
23 changes: 0 additions & 23 deletions compiler/rustc_metadata/src/rmeta/table.rs
@@ -1,6 +1,5 @@
use crate::rmeta::*;

use rustc_data_structures::fingerprint::Fingerprint;
use rustc_hir::def::CtorOf;
use rustc_index::Idx;

Expand Down Expand Up @@ -44,12 +43,6 @@ impl<T> IsDefault for LazyArray<T> {
}
}

impl IsDefault for DefPathHash {
fn is_default(&self) -> bool {
self.0 == Fingerprint::ZERO
}
}

impl IsDefault for UnusedGenericParams {
fn is_default(&self) -> bool {
// UnusedGenericParams encodes the *un*usedness as a bitset.
Expand Down Expand Up @@ -234,22 +227,6 @@ fixed_size_enum! {
}
}

// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost.
impl FixedSizeEncoding for DefPathHash {
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 16]) -> Self {
DefPathHash(Fingerprint::from_le_bytes(*b))
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 16]) {
debug_assert!(!self.is_default());
*b = self.0.to_le_bytes();
}
}

// We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case.
impl FixedSizeEncoding for Option<RawDefId> {
type ByteArray = [u8; 8];
Expand Down
2 changes: 0 additions & 2 deletions compiler/rustc_span/src/def_id.rs
Expand Up @@ -114,8 +114,6 @@ impl DefPathHash {
}

/// Returns the crate-local part of the [DefPathHash].
///
/// Used for tests.
#[inline]
pub fn local_hash(&self) -> Hash64 {
self.0.split().1
Expand Down

0 comments on commit cf64273

Please sign in to comment.