Skip to content

Commit

Permalink
all: Switch to deterministic IndexMap
Browse files Browse the repository at this point in the history
While testing ccache I noticed that cache hit rate was extremely low. After some debugging I noticed that the CFLAGS and other environmental variables and macro scripts were not being set deterministically. This is problematic for ccache as it hashes the compiler arguments as part of the key and the order matters to get the same key on a future run.

The cause of this non-determinism is the Rust stdlib HashMap and HashSet, which intentionally do not guarantee reproducible hashing and ordering. To solve this switch to IndexMap and IndexSet, drop-in compatible replacements for HashMap and HashSet respectively. Since non-determinism is just generally a bad idea for build tools and package management software replace ALL uses of HashMap and HashSet repo-wide except for one use in `moss/src/db/meta/mod.rs` which I do not know enough Rust to resolve the compile errors.

Tested by building a package with ccache enabled and using `ccache --show-stats` in the package recipe to verify that the hit rate was now 100% and that the build stage time had dropped to just the time it spent linking.

Signed-off-by: Reilly Brogan <reilly@reillybrogan.com>
  • Loading branch information
ReillyBrogan committed May 6, 2024
1 parent 931a4f5 commit a6a0ad0
Show file tree
Hide file tree
Showing 31 changed files with 180 additions and 156 deletions.
6 changes: 6 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ clap = { version = "4.5.1", features = ["derive", "string"] }
crossterm = "0.27.0"
derive_more = "0.99"
dialoguer = "0.11.0"
diesel = { version = "2.1.4", features = ["sqlite","returning_clauses_for_sqlite_3_35"] }
diesel = { version = "2.1.4", features = ["sqlite", "returning_clauses_for_sqlite_3_35"] }
diesel_migrations = "2.1.0"
dirs = "5.0"
elf = "0.7.4"
Expand All @@ -28,6 +28,7 @@ itertools = "0.12.1"
futures = "0.3.30"
glob = "0.3.1"
hex = "0.4.3"
indexmap = { version = "2.2.6", features = ["serde", "std"] }
libsqlite3-sys = { version = "0.25.2", features = ["bundled"] }
log = "0.4"
nom = "7.1.3"
Expand All @@ -48,7 +49,7 @@ tokio-stream = { version = "0.1.14", features = ["time"] }
tokio-util = { version = "0.7.9", features = ["io"] }
url = { version = "2.5.0", features = ["serde"] }
xxhash-rust = { version = "0.8.10", features = ["xxh3"] }
zstd = { version = "0.12.4", features = [ "zstdmt" ] }
zstd = { version = "0.12.4", features = ["zstdmt"] }

[profile.release]
lto = 'thin'
Expand Down
1 change: 1 addition & 0 deletions boulder/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ elf.workspace = true
glob.workspace = true
futures.workspace = true
hex.workspace = true
indexmap.workspace = true
itertools.workspace = true
nix.workspace = true
rayon.workspace = true
Expand Down
10 changes: 5 additions & 5 deletions boulder/src/build/job/phase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
//
// SPDX-License-Identifier: MPL-2.0

use std::collections::HashSet;

use indexmap::IndexSet;
use itertools::Itertools;

use stone_recipe::{
script,
tuning::{self, Toolchain},
Script,
};

use tui::Styled;

use super::{work_dir, Error};
use crate::build::pgo;
use crate::{architecture::BuildTarget, util, Macros, Paths, Recipe};

use super::{work_dir, Error};

pub fn list(pgo_stage: Option<pgo::Stage>) -> Vec<Phase> {
if matches!(pgo_stage, Some(pgo::Stage::One | pgo::Stage::Two)) {
Phase::WORKLOAD.to_vec()
Expand Down Expand Up @@ -316,7 +316,7 @@ fn add_tuning(
flags
.map(|s| s.trim())
.filter(|s| s.len() > 1)
.collect::<HashSet<_>>()
.collect::<IndexSet<_>>()
.into_iter()
.join(" ")
}
Expand Down
7 changes: 4 additions & 3 deletions boulder/src/build/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
//
// SPDX-License-Identifier: MPL-2.0

use std::collections::HashSet;
use std::{fs, io};

use indexmap::IndexSet;
use thiserror::Error;

use moss::{repository, runtime, Installation};
use stone_recipe::{tuning::Toolchain, Upstream};
use thiserror::Error;

use crate::build::Builder;
use crate::{container, timing, util, Timing};
Expand Down Expand Up @@ -149,7 +150,7 @@ fn packages(builder: &Builder) -> Vec<&str> {
.into_iter()
.chain(extra_deps)
// Remove dupes
.collect::<HashSet<_>>()
.collect::<IndexSet<_>>()
.into_iter()
.collect()
}
Expand Down
25 changes: 14 additions & 11 deletions boulder/src/cli/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
//
// SPDX-License-Identifier: MPL-2.0

use std::{collections::HashMap, io};
use std::io;

use boulder::{profile, Env, Profile};
use clap::Parser;
use indexmap::IndexMap;
use itertools::Itertools;
use moss::{repository, runtime, Installation, Repository};
use thiserror::Error;
use url::Url;

use boulder::{profile, Env, Profile};
use moss::{repository, runtime, Installation, Repository};

#[derive(Debug, Parser)]
#[command(about = "Manage boulder profiles")]
pub struct Command {
Expand All @@ -27,13 +29,13 @@ pub enum Subcommand {
#[arg(help = "profile name")]
name: String,
#[arg(
short = 'r',
long = "repo",
required = true,
help = "profile repositories",
value_parser = parse_repository,
help = "repository to add to profile, can be passed multiple times",
long_help = "repository to add to profile\n\nExample: --repo name=volatile,uri=https://dev.serpentos.com/volatile/x86_64/stone.index,priority=100"
short = 'r',
long = "repo",
required = true,
help = "profile repositories",
value_parser = parse_repository,
help = "repository to add to profile, can be passed multiple times",
long_help = "repository to add to profile\n\nExample: --repo name=volatile,uri=https://dev.serpentos.com/volatile/x86_64/stone.index,priority=100"
)]
repos: Vec<(repository::Id, Repository)>,
},
Expand All @@ -49,7 +51,7 @@ fn parse_repository(s: &str) -> Result<(repository::Id, Repository), String> {
let key_values = s
.split(',')
.filter_map(|kv| kv.split_once('='))
.collect::<HashMap<_, _>>();
.collect::<IndexMap<_, _>>();

let id = repository::Id::new(key_values.get("name").ok_or("missing name")?.to_string());
let uri = key_values
Expand Down Expand Up @@ -140,6 +142,7 @@ pub fn update<'a>(env: &'a Env, manager: profile::Manager<'a>, profile: &profile

Ok(())
}

#[derive(Debug, Error)]
pub enum Error {
#[error("config")]
Expand Down
16 changes: 7 additions & 9 deletions boulder/src/draft/build.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
// SPDX-FileCopyrightText: Copyright © 2020-2024 Serpent OS Developers
//
// SPDX-License-Identifier: MPL-2.0
use std::{
collections::{HashMap, HashSet},
fmt,
num::NonZeroU64,
};
use std::{fmt, num::NonZeroU64};

use indexmap::{IndexMap, IndexSet};

use moss::Dependency;

Expand Down Expand Up @@ -102,7 +100,7 @@ impl fmt::Display for Phases {
/// State passed to each system when processing paths
struct State<'a> {
/// Any dependencies that need to be recorded
dependencies: &'a mut HashSet<Dependency>,
dependencies: &'a mut IndexSet<Dependency>,
/// Total confidence level of the current build [`System`]
confidence: u64,
}
Expand All @@ -124,14 +122,14 @@ pub struct Analysis {
/// The detected build [`System`], if any
pub detected_system: Option<System>,
/// All detected dependencies
pub dependencies: HashSet<Dependency>,
pub dependencies: IndexSet<Dependency>,
}

/// Analyze the provided paths to determine which build [`System`]
/// the project uses and any dependencies that are identified
pub fn analyze(files: &[File]) -> Result<Analysis, Error> {
let mut dependencies = HashSet::new();
let mut confidences = HashMap::new();
let mut dependencies = IndexSet::new();
let mut confidences = IndexMap::new();

for system in System::ALL {
let mut state = State {
Expand Down
7 changes: 4 additions & 3 deletions boulder/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
//
// SPDX-License-Identifier: MPL-2.0

use std::{collections::HashMap, fs, io, path::Path};
use std::{fs, io, path::Path};

use indexmap::IndexMap;
use thiserror::Error;

use crate::{util, Env};

#[derive(Debug)]
pub struct Macros {
pub arch: HashMap<String, stone_recipe::Macros>,
pub arch: IndexMap<String, stone_recipe::Macros>,
pub actions: Vec<stone_recipe::Macros>,
}

Expand All @@ -25,7 +26,7 @@ impl Macros {
let arch_files = util::enumerate_files(&arch_dir, matcher).map_err(Error::ArchFiles)?;
let action_files = util::enumerate_files(&actions_dir, matcher).map_err(Error::ActionFiles)?;

let mut arch = HashMap::new();
let mut arch = IndexMap::new();
let mut actions = vec![];

for file in arch_files {
Expand Down
24 changes: 11 additions & 13 deletions boulder/src/package.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
// SPDX-FileCopyrightText: Copyright © 2020-2024 Serpent OS Developers
//
// SPDX-License-Identifier: MPL-2.0
use std::{
collections::{hash_map, HashMap},
fs, io,
num::NonZeroU64,
};
use std::{fs, io, num::NonZeroU64};

use indexmap::IndexMap;
use itertools::Itertools;
use thiserror::Error;

use stone::write::digest;
use stone_recipe::{script, Package};
use thiserror::Error;

use crate::{build, container, timing, util, Macros, Paths, Recipe, Timing};

Expand All @@ -24,7 +22,7 @@ mod emit;
pub struct Packager<'a> {
paths: &'a Paths,
recipe: &'a Recipe,
packages: HashMap<String, Package>,
packages: IndexMap<String, Package>,
collector: Collector,
build_release: NonZeroU64,
}
Expand Down Expand Up @@ -90,7 +88,7 @@ impl<'a> Packager<'a> {
.packages
.iter()
.filter_map(|(name, package)| {
let bucket = analysis.buckets.remove(name)?;
let bucket = analysis.buckets.swap_remove(name)?;

Some(emit::Package::new(
name,
Expand Down Expand Up @@ -119,13 +117,13 @@ fn resolve_packages(
macros: &Macros,
recipe: &Recipe,
collector: &mut Collector,
) -> Result<HashMap<String, Package>, Error> {
) -> Result<IndexMap<String, Package>, Error> {
let mut parser = script::Parser::new();
parser.add_definition("name", &recipe.parsed.source.name);
parser.add_definition("version", &recipe.parsed.source.version);
parser.add_definition("release", recipe.parsed.source.release);

let mut packages = HashMap::new();
let mut packages = IndexMap::new();

// Add a package, ensuring it's fully expanded
//
Expand Down Expand Up @@ -165,11 +163,11 @@ fn resolve_packages(
}

match packages.entry(name.clone()) {
hash_map::Entry::Vacant(entry) => {
indexmap::map::Entry::Vacant(entry) => {
entry.insert(package);
}
hash_map::Entry::Occupied(entry) => {
let prev = entry.remove();
indexmap::map::Entry::Occupied(entry) => {
let prev = entry.swap_remove();

package.run_deps = package.run_deps.into_iter().chain(prev.run_deps).sorted().collect();
package.paths = package
Expand Down
9 changes: 6 additions & 3 deletions boulder/src/package/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@
// SPDX-License-Identifier: MPL-2.0

use std::{
collections::{BTreeSet, HashMap, VecDeque},
collections::{BTreeSet, VecDeque},
path::PathBuf,
};

use indexmap::IndexMap;

use moss::{Dependency, Provider};
use stone::write::digest;
use tui::{ProgressBar, ProgressStyle, Styled};

use super::collect::{Collector, PathInfo};
use crate::{Paths, Recipe};

use super::collect::{Collector, PathInfo};

mod handler;

pub type BoxError = Box<dyn std::error::Error>;
Expand All @@ -24,7 +27,7 @@ pub struct Chain<'a> {
paths: &'a Paths,
collector: &'a Collector,
hasher: &'a mut digest::Hasher,
pub buckets: HashMap<String, Bucket>,
pub buckets: IndexMap<String, Bucket>,
}

impl<'a> Chain<'a> {
Expand Down
Loading

0 comments on commit a6a0ad0

Please sign in to comment.