diff --git a/Cargo.lock b/Cargo.lock index 03a4d71f67c3a..f50c51ed82d35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -239,6 +239,21 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "assert_fs" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652f6cb1f516886fcfee5e7a5c078b9ade62cfcb889524efe5a64d682dd27a9" +dependencies = [ + "anstyle", + "doc-comment", + "globwalk", + "predicates", + "predicates-core", + "predicates-tree", + "tempfile", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -544,8 +559,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link 0.2.1", ] @@ -1265,6 +1282,12 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8975ffdaa0ef3661bfe02dbdcc06c9f829dfafe6a3c474de366a8d5e44276921" +[[package]] +name = "doc-comment" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "780955b8b195a21ab8e4ac6b60dd1dbdcec1dc6c51c0617964b08c81785e12c9" + [[package]] name = "dyn-clone" version = "1.0.20" @@ -1637,6 +1660,17 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags", + "ignore", + "walkdir", +] + [[package]] name = "gsgdt" version = "0.1.2" @@ -3311,6 +3345,27 @@ dependencies = [ "walkdir", ] +[[package]] +name = "repro-check" +version = "0.1.0" +dependencies = [ + "anyhow", + "assert_fs", + "chrono", + "clap", + "env_logger", + "hex", + "ignore", + "log", + "num_cpus", + "rayon", + "serde", + "sha2", + "tempfile", + "toml 0.8.23", + "walkdir", +] + [[package]] name = "run_make_support" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index 67c7a9d67edc8..0470ab9aa6c01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ members = [ "src/tools/remote-test-client", "src/tools/remote-test-server", "src/tools/replace-version-placeholder", + "src/tools/repro-check", "src/tools/run-make-support", "src/tools/rust-installer", "src/tools/rustdoc", diff --git a/src/bootstrap/src/core/build_steps/tool.rs b/src/bootstrap/src/core/build_steps/tool.rs index a90687c5c0f98..0c9a4454ed594 100644 --- a/src/bootstrap/src/core/build_steps/tool.rs +++ b/src/bootstrap/src/core/build_steps/tool.rs @@ -501,6 +501,7 @@ bootstrap_tool!( UnicodeTableGenerator, "src/tools/unicode-table-generator", "unicode-table-generator"; FeaturesStatusDump, "src/tools/features-status-dump", "features-status-dump"; OptimizedDist, "src/tools/opt-dist", "opt-dist", submodules = &["src/tools/rustc-perf"]; + ReproCheck, "src/tools/repro-check", "repro-check"; RunMakeSupport, "src/tools/run-make-support", "run_make_support", artifact_kind = ToolArtifactKind::Library; ); diff --git a/src/bootstrap/src/core/builder/mod.rs b/src/bootstrap/src/core/builder/mod.rs index 961d0cd855ae3..6a3f9326e9b41 100644 --- a/src/bootstrap/src/core/builder/mod.rs +++ b/src/bootstrap/src/core/builder/mod.rs @@ -801,6 +801,7 @@ impl<'a> Builder<'a> { llvm::CrtBeginEnd, tool::RustdocGUITest, tool::OptimizedDist, + tool::ReproCheck, tool::CoverageDump, tool::LlvmBitcodeLinker, tool::RustcPerf, diff --git a/src/tools/repro-check/Cargo.toml b/src/tools/repro-check/Cargo.toml new file mode 100644 index 0000000000000..b5f330cd3057b --- /dev/null +++ b/src/tools/repro-check/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "repro-check" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0" +clap = { version = "4.5", features = ["derive", "env"] } +chrono = { version = "0.4", features = ["clock"] } +env_logger = "0.11" +hex = "0.4" +ignore = "0.4" +log = "0.4" +num_cpus = "1.16" +rayon = "1.10" +serde = { version = "1.0", features = ["derive"] } +sha2 = "0.10" +toml = "0.8" +walkdir = "2.5" + +[dev-dependencies] +assert_fs = "1.1" +tempfile = "3.12" diff --git a/src/tools/repro-check/README.md b/src/tools/repro-check/README.md new file mode 100644 index 0000000000000..af4e8d6a8b62b --- /dev/null +++ b/src/tools/repro-check/README.md @@ -0,0 +1,47 @@ +# repro-check + +repro-check is a lightweight tool designed to verify the reproducibility of Rust compiler builds. + +It works by creating two separate copies of the Rust source tree, building stage-2 (or a full distribution) +in each copy, and then comparing the resulting sysroots using SHA-256 checksums. +If any discrepancies are detected, repro-check generates a detailed HTML report highlighting the differences +and exits with a non-zero status. + +This tool is ideal for developers and CI systems aiming to ensure deterministic, reproducible builds of Rust. + +## How to build and run: + +```bash +./x.py build src/tools/repro-check +./build//stage1-tools-bin/repro-check --help + +# simplest run (host target, stage 2 only) +./build/x86_64-unknown-linux-gnu/stage1-tools-bin/repro-check + +# Recommended usage +./build/x86_64-unknown-linux-gnu/stage1-tools-bin/repro-check \ + --jobs 16 \ + --exclude-pattern .so \ + --path-delta 10 \ + --html-output my-report.html + +# full distribution (takes a long time) +./build/x86_64-unknown-linux-gnu/stage1-tools-bin/repro-check --full-dist + +# start from scratch +./build/x86_64-unknown-linux-gnu/stage1-tools-bin/repro-check --clean + +All flags: + +--src-root path to the rust checkout (default: current dir) +--target build for this target (default: host) +--jobs parallel jobs (default: number of CPUs) +--html-output report file (default: repro_report.html) +--exclude-pattern ignore files ending with (can be repeated) +--path-delta make the second build’s path longer by n segments (default 10) +--full-dist build a complete distribution instead of stage 2 +--clean delete the workspace first +--skip-copy reuse an existing workspace (don’t copy source) +--verbose print more details + +- Tests: Run `cargo test` in src/tools/repro-check. diff --git a/src/tools/repro-check/src/build.rs b/src/tools/repro-check/src/build.rs new file mode 100644 index 0000000000000..b0413464407d6 --- /dev/null +++ b/src/tools/repro-check/src/build.rs @@ -0,0 +1,82 @@ +use std::env; +use std::path::Path; +use std::process::{Command, Stdio}; + +use anyhow::{Context, Result}; +use log::{info, warn}; + +// Runs x.py in the given environment root. Handles the build or dist command, +// stage limiting, and job config. +pub fn run_xpy(env_root: &Path, jobs: u32, target: Option<&str>, full_dist: bool) -> Result<()> { + let x_py = env_root.join("x.py"); + + let python = env::var("BOOTSTRAP_PYTHON").unwrap_or_else(|_| { + if cfg!(windows) { "python".to_string() } else { "python3".to_string() } + }); + + let mut cmd = Command::new(&python); + cmd.arg(&x_py); + + let build_cmd = if full_dist { "dist" } else { "build" }; + cmd.arg(build_cmd); + + if !full_dist { + cmd.arg("--stage").arg("2"); + cmd.arg("compiler"); + } + + if let Some(t) = target { + cmd.arg("--target").arg(t); + } + + cmd.arg("-j").arg(jobs.to_string()); + cmd.arg("--config").arg("bootstrap.toml"); + cmd.current_dir(env_root); + cmd.stdout(Stdio::inherit()); + cmd.stderr(Stdio::inherit()); + + info!("Kicking off: {} {}", python, x_py.display()); + + let status = cmd.status().with_context(|| format!("Couldn't run x.py in {:?}", env_root))?; + if !status.success() { + return Err(anyhow::anyhow!("Build bombed in {:?}", env_root)); + } + + Ok(()) +} + +// Figures out the host triple by asking rustc, or guessing if that fails. +pub fn detect_host(src_root: &Path) -> Result { + let output = Command::new("rustc") + .arg("-vV") + .output() + .context("Couldn't query rustc for version info")?; + + if !output.status.success() { + warn!("rustc -vV didn't work; falling back to a guess."); + } + + let out_str = String::from_utf8_lossy(&output.stdout); + for line in out_str.lines() { + if line.starts_with("host: ") { + return Ok(line.trim_start_matches("host: ").trim().to_string()); + } + } + + let arch = if cfg!(target_arch = "x86_64") { + "x86_64" + } else if cfg!(target_arch = "aarch64") { + "aarch64" + } else { + "unknown" + }; + let os = if cfg!(target_os = "windows") { + "windows" + } else if cfg!(target_os = "macos") { + "apple-darwin" + } else { + "linux-gnu" + }; + info!("Detected host from src root {:?}: {arch}-unknown-{os}", src_root); + Ok(format!("{arch}-unknown-{os}")) +} diff --git a/src/tools/repro-check/src/compare.rs b/src/tools/repro-check/src/compare.rs new file mode 100644 index 0000000000000..5a6a4ebc5a1b1 --- /dev/null +++ b/src/tools/repro-check/src/compare.rs @@ -0,0 +1,264 @@ +use std::collections::HashSet; +use std::fs::File; +use std::io::copy; +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use chrono::Utc; +use log::{info, trace, warn}; +use rayon::prelude::*; +use sha2::{Digest, Sha256}; +use walkdir::{DirEntry, WalkDir}; + +#[derive(Debug, Clone)] +pub struct ComparisonReport { + pub mismatches: Vec, + pub total_files: usize, + pub matching_files: usize, + pub ignored_files: Vec<(PathBuf, String)>, + pub compared_files: Vec, +} + +#[derive(Debug, Clone)] +pub struct Mismatch { + pub path: PathBuf, + pub hash_a: String, + pub hash_b: String, +} + +/// Compares two directories, ignoring certain file patterns. +/// Collects files from dir_a, filters them, hashes in parallel, then checks against dir_b. +/// We sort entries for consistent ordering - helps with debugging. +pub fn compare_directories( + dir_a: &Path, + dir_b: &Path, + host: &str, + exclude_patterns: &HashSet, +) -> Result { + let mut entries_a: Vec = WalkDir::new(dir_a) + .sort_by_file_name() + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .collect(); + + let mut ignored_files = Vec::new(); + let mut compared_files = Vec::new(); + + entries_a.retain(|entry| { + let fname = entry.file_name().to_string_lossy().to_string(); + + // Always compare lowercase for case-insensitive suffix match + let name_to_check = fname.to_lowercase(); + + for pat in exclude_patterns { + let pat_to_check = pat.to_lowercase(); + + if name_to_check.ends_with(&pat_to_check) { + let rel = entry.path().strip_prefix(dir_a).unwrap().to_path_buf(); + ignored_files.push((rel, pat.clone())); + return false; + } + } + + let rel = entry.path().strip_prefix(dir_a).unwrap().to_path_buf(); + compared_files.push(rel); + true + }); + + let total_files = entries_a.len() + ignored_files.len(); + trace!("Found {} files to compare, ignored {}", entries_a.len(), ignored_files.len()); + + let hashes_a: Vec<(PathBuf, String)> = entries_a + .par_iter() + .map(|entry| { + let rel_path = entry.path().strip_prefix(dir_a).unwrap().to_path_buf(); + match compute_hash(entry.path()) { + Ok(h) => (rel_path, h), + Err(e) => { + warn!("Hash error on {:?}: {}", entry.path(), e); + (rel_path, "HASH_ERROR".to_string()) + } + } + }) + .collect(); + + let mut mismatches = Vec::new(); + for (rel_path, hash_a) in hashes_a { + let path_b = dir_b.join(&rel_path); + let hash_b = if path_b.exists() { + compute_hash(&path_b) + .map_err(|e| warn!("Hash fail on B {:?}: {}", path_b, e)) + .unwrap_or("HASH_ERROR".to_string()) + } else { + "MISSING_FILE".to_string() + }; + + if hash_a != hash_b { + mismatches.push(Mismatch { path: rel_path, hash_a, hash_b }); + } + } + + let matching_files = compared_files.len() - mismatches.len(); + info!("Compared on host {} - mismatches: {}", host, mismatches.len()); + + Ok(ComparisonReport { mismatches, total_files, matching_files, ignored_files, compared_files }) +} + +/// Builds an HTML report from the comparison results. +pub fn generate_html_report(report: &ComparisonReport, output_path: &Path) -> Result<()> { + let (status_class, status_text) = + if report.mismatches.is_empty() { ("success", "PASSED") } else { ("failure", "FAILED") }; + + let timestamp = Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(); + let mut html = String::new(); + + html.push_str(&format!( + r#" + + + + Repro Check Report + + + +
+
+

Repro Check: {status_text}

+
+ Total files: {total} | + Matching: {matching} | + Mismatches: {mcount} | + Ignored: {icount} +
+
+ +
+

Mismatches ({mcount})

"#, + status_class = status_class, + status_text = status_text, + total = report.total_files, + matching = report.matching_files, + mcount = report.mismatches.len(), + icount = report.ignored_files.len(), + )); + + if report.mismatches.is_empty() { + html.push_str("

Everything matches - good job!

"); + } else { + html.push_str(r#" + + + + "#); + for mismatch in &report.mismatches { + let short_a = mismatch.hash_a.get(..16).unwrap_or("N/A"); + let short_b = mismatch.hash_b.get(..16).unwrap_or("N/A"); + html.push_str(&format!( + r#""#, + mismatch.path.display(), + short_a, + short_b + )); + } + html.push_str("
File PathHash A (short)Hash B (short)
{}{}{}
"); + } + + html.push_str(&format!( + r#" +
+
+

Ignored Files ({})

"#, + report.ignored_files.len() + )); + + if report.ignored_files.is_empty() { + html.push_str("

None ignored this time.

"); + } else { + html.push_str( + r#" +
+ Click to hide/show + + + + "#, + ); + for (path, pat) in &report.ignored_files { + html.push_str(&format!( + r#""#, + path.display(), + pat + )); + } + html.push_str("
FileMatched Pattern
{}{}
"); + } + + html.push_str(&format!( + r#" +
+
+

Files Compared ({})

"#, + report.compared_files.len() + )); + + if report.compared_files.is_empty() { + html.push_str("

Nothing to compare - maybe all ignored?

"); + } else { + html.push_str( + r#" +
+ Expand to see list +
    + "#, + ); + for path in &report.compared_files { + html.push_str(&format!("
  • {}
  • ", path.display())); + } + html.push_str("
"); + } + + html.push_str(&format!( + r#" +
+
+ Report generated on {timestamp} +
+
+ +"#, + timestamp = timestamp + )); + + std::fs::write(output_path, html)?; + info!("Wrote report to {}", output_path.display()); + Ok(()) +} + +/// Simple hash func - SHA256, copies file content into hasher. +pub fn compute_hash(path: &Path) -> Result { + let mut f = File::open(path)?; + let mut hasher = Sha256::new(); + copy(&mut f, &mut hasher)?; + Ok(hex::encode(hasher.finalize())) +} diff --git a/src/tools/repro-check/src/config.rs b/src/tools/repro-check/src/config.rs new file mode 100644 index 0000000000000..cdf505616e0cc --- /dev/null +++ b/src/tools/repro-check/src/config.rs @@ -0,0 +1,39 @@ +use std::fs; +use std::path::Path; + +use anyhow::Result; +use log::info; + +pub fn write_bootstrap_toml(env_root: &Path, target: Option<&str>) -> Result<()> { + let target_line = target.map(|t| format!("target = [\"{}\"]\n", t)).unwrap_or_default(); + + let content = format!( + r#" +[llvm] +download-ci-llvm = true + +[build] +extended = true +tools = ["cargo", "clippy", "rustfmt", "rustdoc"] +full-bootstrap = true +{} + +[rust] +remap-debuginfo = true +debug = false +debug-assertions = false +backtrace-on-ice = false +debug-logging = false +channel = "nightly" + +[dist] +src-tarball = false +"#, + target_line + ); + + let toml_path = env_root.join("bootstrap.toml"); + fs::write(&toml_path, content.trim_start())?; + info!("Wrote deterministic bootstrap.toml to {}", toml_path.display()); + Ok(()) +} diff --git a/src/tools/repro-check/src/fs_utils.rs b/src/tools/repro-check/src/fs_utils.rs new file mode 100644 index 0000000000000..3ac68cad9bdd2 --- /dev/null +++ b/src/tools/repro-check/src/fs_utils.rs @@ -0,0 +1,98 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use ignore::{Walk, WalkBuilder}; +use log::{info, warn}; + +// Sets up two build envs by copying source (if not skipped), cleaning first if asked. +// The path_delta is for testing path sensitivity - adds extra dir levels to B. +pub fn prepare_workspace( + workspace: &Path, + src_root: &Path, + path_delta: usize, + skip_copy: bool, +) -> Result<(PathBuf, PathBuf)> { + if !skip_copy { + info!("Setting up fresh workspace at {:?}", workspace); + clean_workspace(workspace)?; + fs::create_dir_all(workspace)?; + } + + let env_a = workspace.join("build-a"); + let mut env_b = workspace.join("build-b"); + + if path_delta > 0 { + for i in 1..=path_delta { + env_b = env_b.join(format!("extra{}", i)); + } + } + + if cfg!(windows) && path_delta > 10 { + warn!("Watch out - long paths on Windows might need registry tweaks."); + } + + if !skip_copy { + info!("Copying sources to A: {:?}", env_a); + copy_source_tree(src_root, &env_a)?; + + info!("Copying sources to B: {:?}", env_b); + copy_source_tree(src_root, &env_b)?; + } + + Ok((env_a, env_b)) +} + +pub fn clean_workspace(workspace: &Path) -> Result<()> { + if workspace.exists() { + info!("Cleaning up old workspace: {:?}", workspace); + fs::remove_dir_all(workspace).context("Workspace clean failed")?; + } + Ok(()) +} + +// Copies the source tree, respecting .gitignore and skipping build/.git. +// Uses ignore crate for git-like filtering. +fn copy_source_tree(src: &Path, dest: &Path) -> Result<()> { + let walker: Walk = WalkBuilder::new(src) + .hidden(false) + .git_ignore(true) + .git_global(false) + .git_exclude(true) + .require_git(false) + .build(); + + for entry_res in walker { + let entry = entry_res?; + let from_path = entry.path(); + + if from_path == src { + continue; + } + + if entry.file_type().map_or(false, |ft| ft.is_symlink()) { + continue; + } + + let rel_path = from_path.strip_prefix(src).context("Bad strip prefix")?; + + if rel_path.starts_with(".git") || rel_path.starts_with("build") { + continue; + } + + let to_path = dest.join(rel_path); + + if let Some(ft) = entry.file_type() { + if ft.is_dir() { + fs::create_dir_all(&to_path)?; + } else if ft.is_file() { + if let Some(parent) = to_path.parent() { + fs::create_dir_all(parent)?; + } + fs::copy(from_path, &to_path).context(format!("Copy failed: {:?}", from_path))?; + } + } + } + + Ok(()) +} diff --git a/src/tools/repro-check/src/lib.rs b/src/tools/repro-check/src/lib.rs new file mode 100644 index 0000000000000..e46a97f30f6f7 --- /dev/null +++ b/src/tools/repro-check/src/lib.rs @@ -0,0 +1,4 @@ +pub mod build; +pub mod compare; +pub mod config; +pub mod fs_utils; diff --git a/src/tools/repro-check/src/main.rs b/src/tools/repro-check/src/main.rs new file mode 100644 index 0000000000000..e59e12c430527 --- /dev/null +++ b/src/tools/repro-check/src/main.rs @@ -0,0 +1,145 @@ +//! Tool to check if Rust compiler builds are reproducible. +//! Copies source to two dirs, builds each, hashes artifacts, compares. +//! Handy for spotting non-determinism. + +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::time::Instant; + +use anyhow::{Context, Result, bail}; +use clap::Parser; +use log::{debug, info, trace}; +use repro_check::{build, compare, config, fs_utils}; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Checks Rust build reproducibility", long_about = None)] +struct Args { + #[arg(long, default_value = ".")] + src_root: PathBuf, + + #[arg(long)] + target: Option, + + #[arg(long, default_value = "repro_report.html")] + html_output: PathBuf, + + #[arg(short, long, default_value_t = num_cpus::get() as u32)] + jobs: u32, + + #[arg(long)] + skip_copy: bool, + + #[arg(long, default_value_t = 10)] + path_delta: usize, + + #[arg(long)] + full_dist: bool, + + #[arg(long)] + clean: bool, + + #[arg(long)] + exclude_pattern: Vec, + + #[arg(long)] + verbose: bool, +} + +// Extracted to keep main cleaner - runs a single build. +fn run_one_build( + env_dir: &Path, + jobs: u32, + target: Option<&str>, + full_dist: bool, + label: &str, +) -> Result<()> { + info!("Starting {} build in {:?}", label, env_dir); + let start = Instant::now(); + build::run_xpy(env_dir, jobs, target, full_dist)?; + info!("{} build done in {:?}", label, start.elapsed()); + Ok(()) +} + +fn main() -> Result<()> { + let args = Args::parse(); + + let log_level = if args.verbose { log::LevelFilter::Debug } else { log::LevelFilter::Info }; + env_logger::builder().filter_level(log_level).init(); + + let mut excludes = HashSet::from([ + "metrics.json".to_string(), + ".lock".to_string(), + "git-commit-info".to_string(), + "Cargo.lock".to_string(), + ".log".to_string(), + ]); + for pat in args.exclude_pattern { + excludes.insert(pat); + } + debug!("Excludes: {:?}", excludes); + + let src_root = std::fs::canonicalize(&args.src_root).context("Bad source root path")?; + info!("Repro check starting from {:?}", src_root); + + let workspace = src_root.join("build/repro_workspace"); + if args.clean { + fs_utils::clean_workspace(&workspace)?; + } + + let (env_a, env_b) = + fs_utils::prepare_workspace(&workspace, &src_root, args.path_delta, args.skip_copy)?; + + // Pass the requested target (if any) into the generated bootstrap.toml + let target_for_config = args.target.as_deref(); + config::write_bootstrap_toml(&env_a, target_for_config)?; + config::write_bootstrap_toml(&env_b, target_for_config)?; + + info!("-----------------------------------------------"); + + run_one_build(&env_a, args.jobs, args.target.as_deref(), args.full_dist, "A")?; + + info!("-----------------------------------------------"); + + run_one_build(&env_b, args.jobs, args.target.as_deref(), args.full_dist, "B")?; + + info!("-----------------------------------------------"); + info!("Now comparing..."); + + // The stage2 compiler and sysroot are *always* built under the host triple, + // even when cross-compiling. The target-specific libraries live inside + // `lib/rustlib//` under the host stage2 directory. + let host = build::detect_host(&src_root)?; + let stage2_path = Path::new("build").join(&host).join("stage2"); + + let path_a = env_a.join(&stage2_path); + let path_b = env_b.join(&stage2_path); + + if !path_a.exists() || !path_b.exists() { + bail!( + "Missing stage2 directories at {} — build may have failed or been incomplete", + stage2_path.display() + ); + } + let report = compare::compare_directories(&path_a, &path_b, &host, &excludes)?; + + if args.verbose { + debug!("Ignored:"); + for (p, pat) in &report.ignored_files { + trace!("- {} (via {})", p.display(), pat); + } + debug!("Compared:"); + for p in &report.compared_files { + trace!("- {}", p.display()); + } + } + + compare::generate_html_report(&report, &args.html_output)?; + + if report.mismatches.is_empty() { + info!("All good - builds match!"); + } else { + bail!("Mismatches: {} - see {}", report.mismatches.len(), args.html_output.display()); + } + + Ok(()) +} diff --git a/src/tools/repro-check/tests/integration.rs b/src/tools/repro-check/tests/integration.rs new file mode 100644 index 0000000000000..a5b48395a85e7 --- /dev/null +++ b/src/tools/repro-check/tests/integration.rs @@ -0,0 +1,99 @@ +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::path::PathBuf; + + use assert_fs::TempDir; + use assert_fs::prelude::*; + use repro_check::compare::{compare_directories, compute_hash}; + + #[test] + fn test_hash_computation() { + let temp = TempDir::new().unwrap(); + let file = temp.child("test.txt"); + file.write_str("hello world").unwrap(); + let hash = compute_hash(file.path()).unwrap(); + assert_eq!(hash, "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"); + } + + #[test] + fn test_compare_empty_dirs() { + let temp_a = TempDir::new().unwrap(); + let temp_b = TempDir::new().unwrap(); + let excludes = HashSet::new(); + let report = + compare_directories(temp_a.path(), temp_b.path(), "test-host", &excludes).unwrap(); + assert_eq!(report.total_files, 0); + assert!(report.mismatches.is_empty()); + assert!(report.ignored_files.is_empty()); + assert!(report.compared_files.is_empty()); + } + + #[test] + fn test_ignore_patterns() { + let temp_a = TempDir::new().unwrap(); + let temp_b = TempDir::new().unwrap(); + + temp_a.child("match.log").write_str("ignore me").unwrap(); + temp_a.child("keep.txt").write_str("keep").unwrap(); + temp_b.child("keep.txt").write_str("keep").unwrap(); + + let mut excludes = HashSet::new(); + excludes.insert(".log".to_string()); + + let report = compare_directories(temp_a.path(), temp_b.path(), "host", &excludes).unwrap(); + + assert_eq!(report.total_files, 2); + assert_eq!(report.ignored_files.len(), 1); + assert_eq!(report.compared_files.len(), 1); + assert!(report.mismatches.is_empty()); + + let ignored = &report.ignored_files[0]; + assert_eq!(ignored.0, PathBuf::from("match.log")); + assert_eq!(ignored.1, ".log"); + } + + #[test] + fn test_mismatch_detection() { + let temp_a = TempDir::new().unwrap(); + let temp_b = TempDir::new().unwrap(); + + temp_a.child("diff.txt").write_str("version one").unwrap(); + temp_b.child("diff.txt").write_str("version two").unwrap(); + + let excludes = HashSet::new(); + let report = compare_directories(temp_a.path(), temp_b.path(), "host", &excludes).unwrap(); + + assert_eq!(report.total_files, 1); + assert_eq!(report.mismatches.len(), 1); + assert_eq!(report.matching_files, 0); + assert!(report.ignored_files.is_empty()); + } + + // Edge case: mixed case patterns + #[test] + fn test_case_insensitivity() { + let temp_a = TempDir::new().unwrap(); + temp_a.child("Ignore.METRICS.json").write_str("data").unwrap(); + + let mut excludes = HashSet::new(); + excludes.insert("metrics.json".to_string()); + + let report = compare_directories(temp_a.path(), temp_a.path(), "host", &excludes).unwrap(); + assert_eq!(report.ignored_files.len(), 1); + } + + #[test] + fn test_real_file_mismatch() { + let temp_a = TempDir::new().unwrap(); + let temp_b = TempDir::new().unwrap(); + + temp_a.child("file.bin").write_binary(b"\x00\x01\x02").unwrap(); + temp_b.child("file.bin").write_binary(b"\x00\x01\x03").unwrap(); + + let excludes = HashSet::new(); + let report = compare_directories(temp_a.path(), temp_b.path(), "host", &excludes).unwrap(); + + assert_eq!(report.mismatches.len(), 1); + } +}