From 3ff54e575e6446367971453cb8538a7c94265a70 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Wed, 19 Jun 2024 20:07:37 -0400 Subject: [PATCH] feat: use stable hash from rustc-stable-hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps `-Ztrim-paths` build a stable cross-platform path for the registry and git sources. Sources files then can be found from the same path when debugging. See https://github.com/rust-lang/cargo/issues/13171#issuecomment-1864899037 A few caveats: * This will invalidate the current downloaded caches. Need to put this in the Cargo CHANGELOG. * As a consequence of changing how `SourceId` is hashed, the global cache tracker is also affected because Cargo writes source identifiers (e.g. `index.crates.io-6f17d22bba15001f`) to SQLite. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/global_cache_tracker.rs#L388-L391 * The performance of rustc-stable-hash is slightly worse than the old SipHasher in std on short things like `SourceId`, but for long stuff like fingerprint. See appendix. StableHasher is used in several places (some might not be needed?): * Rebuild detection (fingerprints) * Rustc version, including all the CLI args running `rustc -vV`. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/util/rustc.rs#L326 * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/util/rustc.rs#L381 * Build caches * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/fingerprint/mod.rs#L1456 * Compute rustc `-C metadata` * stable hash for SourceId * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/package_id.rs#L207 * Also read and hash contents from custom target JSON file. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/compile_kind.rs#L81-L91 * `UnitInner::dep_hash` * This is to distinguish same units having different features set between normal and build dependencies. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/ops/cargo_compile/mod.rs#L627 * Hash file contents for `cargo package` to verify if files were modified before and after the build. * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/ops/cargo_package.rs#L999 * Rusc diagnostics deduplication * https://github.com/rust-lang/cargo/blob/6e236509b2331eef64df844b7bbc8ed352294107/src/cargo/core/compiler/job_queue/mod.rs#L311 * Places using `SourceId` identifier like `registry/src` path, and `-Zscript` target directories. Appendix -------- Benchmark on x86_64-unknown-linux-gnu ``` bench_hasher/RustcStableHasher/URL time: [33.843 ps 33.844 ps 33.845 ps] change: [-0.0167% -0.0049% +0.0072%] (p = 0.44 > 0.05) No change in performance detected. Found 10 outliers among 100 measurements (10.00%) 5 (5.00%) low severe 3 (3.00%) high mild 2 (2.00%) high severe bench_hasher/SipHasher/URL time: [18.954 ns 18.954 ns 18.955 ns] change: [-0.1281% -0.0951% -0.0644%] (p = 0.00 < 0.05) Change within noise threshold. Found 14 outliers among 100 measurements (14.00%) 3 (3.00%) low severe 4 (4.00%) low mild 3 (3.00%) high mild 4 (4.00%) high severe bench_hasher/RustcStableHasher/lorem ipsum time: [659.18 ns 659.20 ns 659.22 ns] change: [-0.0192% -0.0062% +0.0068%] (p = 0.34 > 0.05) No change in performance detected. Found 12 outliers among 100 measurements (12.00%) 4 (4.00%) low severe 3 (3.00%) low mild 3 (3.00%) high mild 2 (2.00%) high severe bench_hasher/SipHasher/lorem ipsum time: [1.2006 µs 1.2008 µs 1.2010 µs] change: [+0.0117% +0.0467% +0.0808%] (p = 0.01 < 0.05) Change within noise threshold. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild ``` --- Cargo.lock | 6 ++ Cargo.toml | 2 + .../build_runner/compilation_files.rs | 2 +- src/cargo/core/compiler/compile_kind.rs | 2 +- src/cargo/core/source_id.rs | 77 ++++++++++++------- src/cargo/ops/cargo_compile/mod.rs | 2 +- src/cargo/util/hasher.rs | 21 ++--- src/cargo/util/rustc.rs | 2 +- 8 files changed, 73 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48574a00e322..2789211013b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -316,6 +316,7 @@ dependencies = [ "rand", "regex", "rusqlite", + "rustc-stable-hash", "rustfix", "same-file", "semver", @@ -2941,6 +2942,11 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-stable-hash" +version = "0.1.0" +source = "git+https://github.com/rust-lang/rustc-stable-hash.git?rev=cb8e141b08fb839606a5f79f9b56087cd54b764d#cb8e141b08fb839606a5f79f9b56087cd54b764d" + [[package]] name = "rustfix" version = "0.8.4" diff --git a/Cargo.toml b/Cargo.toml index ac4c3924efee..53915dcc7683 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,6 +78,7 @@ pulldown-cmark = { version = "0.11.0", default-features = false, features = ["ht rand = "0.8.5" regex = "1.10.4" rusqlite = { version = "0.31.0", features = ["bundled"] } +rustc-stable-hash = { git = "https://github.com/rust-lang/rustc-stable-hash.git", rev = "cb8e141b08fb839606a5f79f9b56087cd54b764d" } rustfix = { version = "0.8.2", path = "crates/rustfix" } same-file = "1.0.6" security-framework = "2.10.0" @@ -182,6 +183,7 @@ pathdiff.workspace = true rand.workspace = true regex.workspace = true rusqlite.workspace = true +rustc-stable-hash.workspace = true rustfix.workspace = true same-file.workspace = true semver.workspace = true diff --git a/src/cargo/core/compiler/build_runner/compilation_files.rs b/src/cargo/core/compiler/build_runner/compilation_files.rs index 41ef89d6f0b6..6056d145602c 100644 --- a/src/cargo/core/compiler/build_runner/compilation_files.rs +++ b/src/cargo/core/compiler/build_runner/compilation_files.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::fmt; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::path::{Path, PathBuf}; use std::sync::Arc; diff --git a/src/cargo/core/compiler/compile_kind.rs b/src/cargo/core/compiler/compile_kind.rs index 222732ddebc6..937c353dcba1 100644 --- a/src/cargo/core/compiler/compile_kind.rs +++ b/src/cargo/core/compiler/compile_kind.rs @@ -8,7 +8,7 @@ use anyhow::Context as _; use serde::Serialize; use std::collections::BTreeSet; use std::fs; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::path::Path; /// Indicator for how a unit is being compiled. diff --git a/src/cargo/core/source_id.rs b/src/cargo/core/source_id.rs index d03a0a5769c2..59f7683a3ec3 100644 --- a/src/cargo/core/source_id.rs +++ b/src/cargo/core/source_id.rs @@ -786,70 +786,93 @@ mod tests { // Otherwise please just leave a comment in your PR as to why the hash value is // changing and why the old value can't be easily preserved. // - // The hash value depends on endianness and bit-width, so we only run this test on - // little-endian 64-bit CPUs (such as x86-64 and ARM64) where it matches the - // well-known value. + // The hash value should be stable across platforms, and doesn't depend on + // endianness and bit-width. One caveat is that absolute paths is inherently + // different on Windows than on Unix-like platforms. Unless we omit or strip + // the prefix components (e.g. `C:`), there is not way to have a + // cross-platform stable hash for absolute paths. #[test] - #[cfg(all(target_endian = "little", target_pointer_width = "64"))] fn test_cratesio_hash() { let gctx = GlobalContext::default().unwrap(); let crates_io = SourceId::crates_io(&gctx).unwrap(); - assert_eq!(crate::util::hex::short_hash(&crates_io), "1ecc6299db9ec823"); + assert_eq!(crate::util::hex::short_hash(&crates_io), "83d63c3e13aca8cc"); } // See the comment in `test_cratesio_hash`. // // Only test on non-Windows as paths on Windows will get different hashes. #[test] - #[cfg(all(target_endian = "little", target_pointer_width = "64", not(windows)))] fn test_stable_hash() { - use std::hash::Hasher; + use crate::util::StableHasher; use std::path::Path; + #[cfg(not(windows))] + let ws_root = Path::new("/tmp/ws"); + #[cfg(windows)] + let ws_root = Path::new(r"C:\\tmp\ws"); + let gen_hash = |source_id: SourceId| { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - source_id.stable_hash(Path::new("/tmp/ws"), &mut hasher); + let mut hasher = StableHasher::new(); + source_id.stable_hash(ws_root, &mut hasher); hasher.finish() }; let url = "https://my-crates.io".into_url().unwrap(); let source_id = SourceId::for_registry(&url).unwrap(); - assert_eq!(gen_hash(source_id), 18108075011063494626); - assert_eq!(crate::util::hex::short_hash(&source_id), "fb60813d6cb8df79"); + assert_eq!(gen_hash(source_id), 2056262832525457700); + assert_eq!(crate::util::hex::short_hash(&source_id), "24b984d12650891c"); let url = "https://your-crates.io".into_url().unwrap(); let source_id = SourceId::for_alt_registry(&url, "alt").unwrap(); - assert_eq!(gen_hash(source_id), 12862859764592646184); - assert_eq!(crate::util::hex::short_hash(&source_id), "09c10fd0cbd74bce"); + assert_eq!(gen_hash(source_id), 7851411715584162426); + assert_eq!(crate::util::hex::short_hash(&source_id), "7afabb545bd1f56c"); let url = "sparse+https://my-crates.io".into_url().unwrap(); let source_id = SourceId::for_registry(&url).unwrap(); - assert_eq!(gen_hash(source_id), 8763561830438022424); - assert_eq!(crate::util::hex::short_hash(&source_id), "d1ea0d96f6f759b5"); + assert_eq!(gen_hash(source_id), 15233380663065439616); + assert_eq!(crate::util::hex::short_hash(&source_id), "80ed51ce00d767d3"); let url = "sparse+https://your-crates.io".into_url().unwrap(); let source_id = SourceId::for_alt_registry(&url, "alt").unwrap(); - assert_eq!(gen_hash(source_id), 5159702466575482972); - assert_eq!(crate::util::hex::short_hash(&source_id), "135d23074253cb78"); + assert_eq!(gen_hash(source_id), 12749290624384351691); + assert_eq!(crate::util::hex::short_hash(&source_id), "cbbda5344694eeb0"); let url = "file:///tmp/ws/crate".into_url().unwrap(); let source_id = SourceId::for_git(&url, GitReference::DefaultBranch).unwrap(); - assert_eq!(gen_hash(source_id), 15332537265078583985); - assert_eq!(crate::util::hex::short_hash(&source_id), "73a808694abda756"); - - let path = Path::new("/tmp/ws/crate"); + assert_eq!(gen_hash(source_id), 3109465066469481245); + assert_eq!(crate::util::hex::short_hash(&source_id), "1d5b66d8000a272b"); + let path = &ws_root.join("crate"); let source_id = SourceId::for_local_registry(path).unwrap(); - assert_eq!(gen_hash(source_id), 18446533307730842837); - assert_eq!(crate::util::hex::short_hash(&source_id), "52a84cc73f6fd48b"); + #[cfg(not(windows))] + { + assert_eq!(gen_hash(source_id), 17171351456028149232); + assert_eq!(crate::util::hex::short_hash(&source_id), "f0c5f1e92be54cee"); + } + #[cfg(windows)] + { + assert_eq!(gen_hash(source_id), 10712195329887934127); + assert_eq!(crate::util::hex::short_hash(&source_id), "af96919ae55ca994"); + } let source_id = SourceId::for_path(path).unwrap(); - assert_eq!(gen_hash(source_id), 8764714075439899829); - assert_eq!(crate::util::hex::short_hash(&source_id), "e1ddd48578620fc1"); + assert_eq!(gen_hash(source_id), 13241112980875747369); + #[cfg(not(windows))] + assert_eq!(crate::util::hex::short_hash(&source_id), "e5ba2edec163e65a"); + #[cfg(windows)] + assert_eq!(crate::util::hex::short_hash(&source_id), "429dd6f2283a9b5c"); let source_id = SourceId::for_directory(path).unwrap(); - assert_eq!(gen_hash(source_id), 17459999773908528552); - assert_eq!(crate::util::hex::short_hash(&source_id), "6568fe2c2fab5bfe"); + #[cfg(not(windows))] + { + assert_eq!(gen_hash(source_id), 12461124588148212881); + assert_eq!(crate::util::hex::short_hash(&source_id), "91c47582caceeeac"); + } + #[cfg(windows)] + { + assert_eq!(gen_hash(source_id), 17000469607053345884); + assert_eq!(crate::util::hex::short_hash(&source_id), "5c443d0709cdedeb"); + } } #[test] diff --git a/src/cargo/ops/cargo_compile/mod.rs b/src/cargo/ops/cargo_compile/mod.rs index a09a95c5a32f..ad125aa1b826 100644 --- a/src/cargo/ops/cargo_compile/mod.rs +++ b/src/cargo/ops/cargo_compile/mod.rs @@ -36,7 +36,7 @@ //! ["Cargo Target"]: https://doc.rust-lang.org/nightly/cargo/reference/cargo-targets.html use std::collections::{HashMap, HashSet}; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::sync::Arc; use crate::core::compiler::unit_dependencies::build_unit_dependencies; diff --git a/src/cargo/util/hasher.rs b/src/cargo/util/hasher.rs index 01e15ae2c04a..60d37f8863c5 100644 --- a/src/cargo/util/hasher.rs +++ b/src/cargo/util/hasher.rs @@ -1,23 +1,24 @@ -//! Implementation of a hasher that produces the same values across releases. +//! A hasher that produces the same values across releases and platforms. //! -//! The hasher should be fast and have a low chance of collisions (but is not -//! sufficient for cryptographic purposes). -#![allow(deprecated)] +//! This is a wrapper around [`rustc_stable_hash::StableHasher`]. -use std::hash::{Hasher, SipHasher}; - -pub struct StableHasher(SipHasher); +pub struct StableHasher(rustc_stable_hash::StableHasher); impl StableHasher { pub fn new() -> StableHasher { - StableHasher(SipHasher::new()) + StableHasher(rustc_stable_hash::StableHasher::new()) + } + + pub fn finish(self) -> u64 { + self.0.finalize().0 } } -impl Hasher for StableHasher { +impl std::hash::Hasher for StableHasher { fn finish(&self) -> u64 { - self.0.finish() + panic!("call StableHasher::finish instead"); } + fn write(&mut self, bytes: &[u8]) { self.0.write(bytes) } diff --git a/src/cargo/util/rustc.rs b/src/cargo/util/rustc.rs index 2b80136dc053..4e035b22dbd5 100644 --- a/src/cargo/util/rustc.rs +++ b/src/cargo/util/rustc.rs @@ -1,6 +1,6 @@ use std::collections::hash_map::HashMap; use std::env; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::path::{Path, PathBuf}; use std::sync::Mutex;