From d5429860fbf3a0b72463cb73328e98f2a208c2c2 Mon Sep 17 00:00:00 2001 From: SeanHsieh Date: Thu, 2 May 2024 20:38:10 +0800 Subject: [PATCH 1/2] feat(gc): record workspace manifest and target dir in global cache tracker --- src/cargo/core/global_cache_tracker.rs | 204 ++++++++++++++++++++++++- src/cargo/ops/cargo_compile/mod.rs | 9 +- 2 files changed, 211 insertions(+), 2 deletions(-) diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs index b6cb7a60fdb..9059d73759f 100644 --- a/src/cargo/core/global_cache_tracker.rs +++ b/src/cargo/core/global_cache_tracker.rs @@ -138,6 +138,8 @@ const REGISTRY_CRATE_TABLE: &str = "registry_crate"; const REGISTRY_SRC_TABLE: &str = "registry_src"; const GIT_DB_TABLE: &str = "git_db"; const GIT_CO_TABLE: &str = "git_checkout"; +const WORKSPACE_MANIFEST_TABLE: &str = "workspace_manifest_index"; +const TARGET_DIR_TABLE: &str = "target_dir_index"; /// How often timestamps will be updated. /// @@ -209,6 +211,26 @@ pub struct GitCheckout { pub size: Option, } +/// The key for a workspace manifest entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct WorkspaceManifestIndex { + /// A unique name of the workspace manifest. + pub workspace_manifest_path: InternedString, +} + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct TargetDirIndex { + /// A unique name of the target directory. + pub target_dir_path: InternedString, +} + +/// The key for a workspace entry stored in the database. +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub struct WorkspaceSrc { + pub workspace_manifest_path: InternedString, + pub target_dir_path: InternedString, +} + /// Filesystem paths in the global cache. /// /// Accessing these assumes a lock has already been acquired. @@ -303,6 +325,30 @@ fn migrations() -> Vec { )?; Ok(()) }), + basic_migration( + "CREATE TABLE workspace_manifest_index ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + timestamp INTEGER NOT NULL + )", + ), + basic_migration( + "CREATE TABLE target_dir_index ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + timestamp INTEGER NOT NULL + )", + ), + basic_migration( + "CREATE TABLE workspace_src ( + workspace_id INTEGER NOT NULL, + target_dir_id INTEGER NOT NULL, + timestamp INTEGER NOT NULL, + PRIMARY KEY (workspace_id, target_dir_id), + FOREIGN KEY (workspace_id) REFERENCES workspace_manifest_index (id) ON DELETE CASCADE, + FOREIGN KEY (target_dir_id) REFERENCES target_dir_index (id) ON DELETE CASCADE + )", + ) ] } @@ -1413,7 +1459,16 @@ pub struct DeferredGlobalLastUse { /// The key is the git db name (which is its directory name) and the value /// is the `id` in the `git_db` table. git_keys: HashMap, - + /// Cache of workspace manifest keys, used for faster fetching. + /// + /// The key is the workspace manifest path and the value + /// is the `id` in the `workspace_manifest` table. + workspace_manifest_keys: HashMap, + /// Cache of target dir keys, used for faster fetching. + /// + /// The key is the target dir path and the value + /// is the `id` in the `target_dir` table. + target_dir_keys: HashMap, /// New registry index entries to insert. registry_index_timestamps: HashMap, /// New registry `.crate` entries to insert. @@ -1424,6 +1479,12 @@ pub struct DeferredGlobalLastUse { git_db_timestamps: HashMap, /// New git checkout entries to insert. git_checkout_timestamps: HashMap, + /// New workspace manifest index entries to insert. + workspace_manifest_index_timestamps: HashMap, + /// New target dir index entries to insert. + target_dir_index_timestamps: HashMap, + /// New workspace src entries to insert. + workspace_src_timestamps: HashMap, /// This is used so that a warning about failing to update the database is /// only displayed once. save_err_has_warned: bool, @@ -1437,11 +1498,16 @@ impl DeferredGlobalLastUse { DeferredGlobalLastUse { registry_keys: HashMap::new(), git_keys: HashMap::new(), + workspace_manifest_keys: HashMap::new(), + target_dir_keys: HashMap::new(), registry_index_timestamps: HashMap::new(), registry_crate_timestamps: HashMap::new(), registry_src_timestamps: HashMap::new(), git_db_timestamps: HashMap::new(), git_checkout_timestamps: HashMap::new(), + target_dir_index_timestamps: HashMap::new(), + workspace_manifest_index_timestamps: HashMap::new(), + workspace_src_timestamps: HashMap::new(), save_err_has_warned: false, now: now(), } @@ -1453,6 +1519,9 @@ impl DeferredGlobalLastUse { && self.registry_src_timestamps.is_empty() && self.git_db_timestamps.is_empty() && self.git_checkout_timestamps.is_empty() + && self.target_dir_index_timestamps.is_empty() + && self.workspace_manifest_index_timestamps.is_empty() + && self.workspace_src_timestamps.is_empty() } fn clear(&mut self) { @@ -1461,6 +1530,9 @@ impl DeferredGlobalLastUse { self.registry_src_timestamps.clear(); self.git_db_timestamps.clear(); self.git_checkout_timestamps.clear(); + self.target_dir_index_timestamps.clear(); + self.workspace_manifest_index_timestamps.clear(); + self.workspace_src_timestamps.clear(); } /// Indicates the given [`RegistryIndex`] has been used right now. @@ -1489,6 +1561,13 @@ impl DeferredGlobalLastUse { self.mark_git_checkout_used_stamp(git_checkout, None); } + /// Indicates the given [`WorkspaceSrc`] has been used right now. + /// + /// Also implicitly marks the workspace manifest used, too. + pub fn mark_workspace_src_used(&mut self, workspace_src: WorkspaceSrc) { + self.mark_workspace_src_used_stamp(workspace_src, None); + } + /// Indicates the given [`RegistryIndex`] has been used with the given /// time (or "now" if `None`). pub fn mark_registry_index_used_stamp( @@ -1553,6 +1632,26 @@ impl DeferredGlobalLastUse { self.git_checkout_timestamps.insert(git_checkout, timestamp); } + pub fn mark_workspace_src_used_stamp( + &mut self, + workspace_src: WorkspaceSrc, + timestamp: Option<&SystemTime>, + ) { + let timestamp = timestamp.map_or(self.now, to_timestamp); + let workspace_manifest_index = WorkspaceManifestIndex { + workspace_manifest_path: workspace_src.workspace_manifest_path, + }; + let target_dir_db = TargetDirIndex { + target_dir_path: workspace_src.target_dir_path, + }; + self.target_dir_index_timestamps + .insert(target_dir_db, timestamp); + self.workspace_manifest_index_timestamps + .insert(workspace_manifest_index, timestamp); + self.workspace_src_timestamps + .insert(workspace_src, timestamp); + } + /// Saves all of the deferred information to the database. /// /// This will also clear the state of `self`. @@ -1566,9 +1665,13 @@ impl DeferredGlobalLastUse { // These must run before the ones that refer to their IDs. self.insert_registry_index_from_cache(&tx)?; self.insert_git_db_from_cache(&tx)?; + self.insert_target_dir_index_from_cache(&tx)?; + self.insert_workspace_manifest_index_from_cache(&tx)?; + self.insert_registry_crate_from_cache(&tx)?; self.insert_registry_src_from_cache(&tx)?; self.insert_git_checkout_from_cache(&tx)?; + self.insert_workspace_src_from_cache(&tx)?; tx.commit()?; trace!(target: "gc", "last-use save complete"); Ok(()) @@ -1632,6 +1735,32 @@ impl DeferredGlobalLastUse { ); } + // Flushes all of the `target_dir_db_timestamps` to the database, + // clearing `target_dir_index_timestamps`. + fn insert_target_dir_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + insert_or_update_parent!( + self, + conn, + "target_dir_index", + target_dir_index_timestamps, + target_dir_keys, + target_dir_path + ); + } + + // Flushes all of the `workspace_manifest_index_timestamps` to the database, + // clearing `workspace_manifest_index_timestamps`. + fn insert_workspace_manifest_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + insert_or_update_parent!( + self, + conn, + "workspace_manifest_index", + workspace_manifest_index_timestamps, + workspace_manifest_keys, + workspace_manifest_path + ); + } + /// Flushes all of the `registry_crate_timestamps` to the database, /// clearing `registry_index_timestamps`. fn insert_registry_crate_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { @@ -1707,6 +1836,79 @@ impl DeferredGlobalLastUse { Ok(()) } + // Flushes all of the `workspace_src_timestamps` to the database, + // clearing `workspace_src_timestamps`. + fn insert_workspace_src_from_cache(&mut self, conn: &Connection) -> CargoResult<()> { + let workspace_src_timestamps = std::mem::take(&mut self.workspace_src_timestamps); + for (workspace_src, timestamp) in workspace_src_timestamps { + let workspace_id = self.workspace_id(conn, workspace_src.workspace_manifest_path)?; + let target_dir_id = self.target_dir_id(conn, workspace_src.target_dir_path)?; + let mut stmt = conn.prepare_cached( + "INSERT INTO workspace_src (workspace_id, target_dir_id, timestamp) + VALUES (?1, ?2, ?3) + ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp + WHERE timestamp < ?4", + )?; + stmt.execute(params![ + workspace_id, + target_dir_id, + timestamp, + timestamp - UPDATE_RESOLUTION + ])?; + } + Ok(()) + } + + fn workspace_id( + &mut self, + conn: &Connection, + encoded_workspace_manifest_path: InternedString, + ) -> CargoResult { + match self + .workspace_manifest_keys + .get(&encoded_workspace_manifest_path) + { + Some(i) => Ok(*i), + None => { + let Some(id) = GlobalCacheTracker::id_from_name( + conn, + WORKSPACE_MANIFEST_TABLE, + &encoded_workspace_manifest_path, + )? + else { + bail!("expected workspace_manifest {encoded_workspace_manifest_path} to exist, but wasn't found"); + }; + self.workspace_manifest_keys + .insert(encoded_workspace_manifest_path, id); + Ok(id) + } + } + } + + fn target_dir_id( + &mut self, + conn: &Connection, + encoded_target_dir_path: InternedString, + ) -> CargoResult { + match self.target_dir_keys.get(&encoded_target_dir_path) { + Some(i) => Ok(*i), + None => { + let Some(id) = GlobalCacheTracker::id_from_name( + conn, + TARGET_DIR_TABLE, + &encoded_target_dir_path, + )? + else { + bail!( + "expected target_dir {encoded_target_dir_path} to exist, but wasn't found" + ); + }; + self.target_dir_keys.insert(encoded_target_dir_path, id); + Ok(id) + } + } + } + /// Returns the numeric ID of the registry, either fetching from the local /// cache, or getting it from the database. /// diff --git a/src/cargo/ops/cargo_compile/mod.rs b/src/cargo/ops/cargo_compile/mod.rs index 77f6266355f..1199dfc8a9d 100644 --- a/src/cargo/ops/cargo_compile/mod.rs +++ b/src/cargo/ops/cargo_compile/mod.rs @@ -48,7 +48,7 @@ use crate::core::compiler::{DefaultExecutor, Executor, UnitInterner}; use crate::core::profiles::Profiles; use crate::core::resolver::features::{self, CliFeatures, FeaturesFor}; use crate::core::resolver::{HasDevUnits, Resolve}; -use crate::core::{PackageId, PackageSet, SourceId, TargetKind, Workspace}; +use crate::core::{global_cache_tracker, PackageId, PackageSet, SourceId, TargetKind, Workspace}; use crate::drop_println; use crate::ops; use crate::ops::resolve::WorkspaceResolve; @@ -264,6 +264,13 @@ pub fn create_bcx<'a, 'gctx>( } }; let dry_run = false; + gctx.deferred_global_last_use()? + .mark_workspace_src_used(global_cache_tracker::WorkspaceSrc { + workspace_manifest_path: InternedString::new(ws.root_manifest().to_str().unwrap()), + target_dir_path: InternedString::new( + ws.target_dir().as_path_unlocked().to_str().unwrap(), + ), + }); let resolve = ops::resolve_ws_with_opts( ws, &mut target_data, From 7382a2dc72b7fc16ef67dce1af3787adf0c9becd Mon Sep 17 00:00:00 2001 From: SeanHsieh Date: Wed, 22 May 2024 10:18:14 +0800 Subject: [PATCH 2/2] test(global_cache_tracker): verify workspace manifest and target dir recorded correctly --- src/cargo/core/global_cache_tracker.rs | 36 +++++++++++++++++++++++++ tests/testsuite/global_cache_tracker.rs | 15 +++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs index 9059d73759f..edbd547718d 100644 --- a/src/cargo/core/global_cache_tracker.rs +++ b/src/cargo/core/global_cache_tracker.rs @@ -558,6 +558,42 @@ impl GlobalCacheTracker { Ok(rows) } + // Return all workspace_manifest cache timestamps. + pub fn workspace_manifest_all(&self) -> CargoResult> { + let mut stmt = self + .conn + .prepare_cached("SELECT name, timestamp FROM workspace_manifest_index")?; + let rows = stmt + .query_map([], |row| { + let workspace_manifest_path = row.get_unwrap(0); + let timestamp = row.get_unwrap(1); + let kind = WorkspaceManifestIndex { + workspace_manifest_path: workspace_manifest_path, + }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + + // Return all target dir cache timestamps. + pub fn target_dir_all(&self) -> CargoResult> { + let mut stmt = self + .conn + .prepare_cached("SELECT name, timestamp FROM target_dir_index")?; + let rows = stmt + .query_map([], |row| { + let target_dir_path = row.get_unwrap(0); + let timestamp = row.get_unwrap(1); + let kind = TargetDirIndex { + target_dir_path: target_dir_path, + }; + Ok((kind, timestamp)) + })? + .collect::, _>>()?; + Ok(rows) + } + /// Returns whether or not an auto GC should be performed, compared to the /// last time it was recorded in the database. pub fn should_run_auto_gc(&mut self, frequency: Duration) -> CargoResult { diff --git a/tests/testsuite/global_cache_tracker.rs b/tests/testsuite/global_cache_tracker.rs index 449cdc6e368..f822587b25f 100644 --- a/tests/testsuite/global_cache_tracker.rs +++ b/tests/testsuite/global_cache_tracker.rs @@ -245,6 +245,10 @@ fn implies_source() { short_name: "f0a4ee0".into(), size: None, }); + deferred.mark_workspace_src_used(global_cache_tracker::WorkspaceSrc { + target_dir_path: "/Users/foo/cargo/target".into(), + workspace_manifest_path: "/Users/foo/cargo/Cargo.toml".into(), + }); deferred.save(&mut tracker).unwrap(); let mut indexes = tracker.registry_index_all().unwrap(); @@ -262,6 +266,17 @@ fn implies_source() { let dbs = tracker.git_db_all().unwrap(); assert_eq!(dbs.len(), 1); assert_eq!(dbs[0].0.encoded_git_name, "cargo-e7ff1db891893a9e"); + + let workspace_manifests = tracker.workspace_manifest_all().unwrap(); + assert_eq!(workspace_manifests.len(), 1); + assert_eq!( + workspace_manifests[0].0.workspace_manifest_path, + "/Users/foo/cargo/Cargo.toml" + ); + + let target_dirs = tracker.target_dir_all().unwrap(); + assert_eq!(target_dirs.len(), 1); + assert_eq!(target_dirs[0].0.target_dir_path, "/Users/foo/cargo/target"); } #[cargo_test]