Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] feat(gc): record workspace manifest and target dir in global cache tracker #13846

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
240 changes: 239 additions & 1 deletion src/cargo/core/global_cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ const REGISTRY_CRATE_TABLE: &str = "registry_crate";
const REGISTRY_SRC_TABLE: &str = "registry_src";
const GIT_DB_TABLE: &str = "git_db";
const GIT_CO_TABLE: &str = "git_checkout";
const WORKSPACE_MANIFEST_TABLE: &str = "workspace_manifest_index";
const TARGET_DIR_TABLE: &str = "target_dir_index";

/// How often timestamps will be updated.
///
Expand Down Expand Up @@ -209,6 +211,26 @@ pub struct GitCheckout {
pub size: Option<u64>,
}

/// The key for a workspace manifest entry stored in the database.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct WorkspaceManifestIndex {
/// A unique name of the workspace manifest.
pub workspace_manifest_path: InternedString,
}

#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct TargetDirIndex {
/// A unique name of the target directory.
pub target_dir_path: InternedString,
}

/// The key for a workspace entry stored in the database.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct WorkspaceSrc {
pub workspace_manifest_path: InternedString,
pub target_dir_path: InternedString,
}

/// Filesystem paths in the global cache.
///
/// Accessing these assumes a lock has already been acquired.
Expand Down Expand Up @@ -303,6 +325,30 @@ fn migrations() -> Vec<Migration> {
)?;
Ok(())
}),
basic_migration(
"CREATE TABLE workspace_manifest_index (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
timestamp INTEGER NOT NULL
)",
),
basic_migration(
"CREATE TABLE target_dir_index (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
timestamp INTEGER NOT NULL
)",
),
basic_migration(
"CREATE TABLE workspace_src (
workspace_id INTEGER NOT NULL,
target_dir_id INTEGER NOT NULL,
timestamp INTEGER NOT NULL,
PRIMARY KEY (workspace_id, target_dir_id),
FOREIGN KEY (workspace_id) REFERENCES workspace_manifest_index (id) ON DELETE CASCADE,
FOREIGN KEY (target_dir_id) REFERENCES target_dir_index (id) ON DELETE CASCADE
)",
Comment on lines +328 to +350
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whats the motivation for

  • Tracking this as 3 separate tables
  • Having timestamps for workspace dir, target dir, and workspace dir + target dir

)
]
}

Expand Down Expand Up @@ -512,6 +558,42 @@ impl GlobalCacheTracker {
Ok(rows)
}

// Return all workspace_manifest cache timestamps.
pub fn workspace_manifest_all(&self) -> CargoResult<Vec<(WorkspaceManifestIndex, Timestamp)>> {
let mut stmt = self
.conn
.prepare_cached("SELECT name, timestamp FROM workspace_manifest_index")?;
let rows = stmt
.query_map([], |row| {
let workspace_manifest_path = row.get_unwrap(0);
let timestamp = row.get_unwrap(1);
let kind = WorkspaceManifestIndex {
workspace_manifest_path: workspace_manifest_path,
};
Ok((kind, timestamp))
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(rows)
}

// Return all target dir cache timestamps.
pub fn target_dir_all(&self) -> CargoResult<Vec<(TargetDirIndex, Timestamp)>> {
let mut stmt = self
.conn
.prepare_cached("SELECT name, timestamp FROM target_dir_index")?;
let rows = stmt
.query_map([], |row| {
let target_dir_path = row.get_unwrap(0);
let timestamp = row.get_unwrap(1);
let kind = TargetDirIndex {
target_dir_path: target_dir_path,
};
Ok((kind, timestamp))
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(rows)
}

/// Returns whether or not an auto GC should be performed, compared to the
/// last time it was recorded in the database.
pub fn should_run_auto_gc(&mut self, frequency: Duration) -> CargoResult<bool> {
Expand Down Expand Up @@ -1413,7 +1495,16 @@ pub struct DeferredGlobalLastUse {
/// The key is the git db name (which is its directory name) and the value
/// is the `id` in the `git_db` table.
git_keys: HashMap<InternedString, ParentId>,

/// Cache of workspace manifest keys, used for faster fetching.
///
/// The key is the workspace manifest path and the value
/// is the `id` in the `workspace_manifest` table.
workspace_manifest_keys: HashMap<InternedString, ParentId>,
/// Cache of target dir keys, used for faster fetching.
///
/// The key is the target dir path and the value
/// is the `id` in the `target_dir` table.
target_dir_keys: HashMap<InternedString, ParentId>,
/// New registry index entries to insert.
registry_index_timestamps: HashMap<RegistryIndex, Timestamp>,
/// New registry `.crate` entries to insert.
Expand All @@ -1424,6 +1515,12 @@ pub struct DeferredGlobalLastUse {
git_db_timestamps: HashMap<GitDb, Timestamp>,
/// New git checkout entries to insert.
git_checkout_timestamps: HashMap<GitCheckout, Timestamp>,
/// New workspace manifest index entries to insert.
workspace_manifest_index_timestamps: HashMap<WorkspaceManifestIndex, Timestamp>,
/// New target dir index entries to insert.
target_dir_index_timestamps: HashMap<TargetDirIndex, Timestamp>,
/// New workspace src entries to insert.
workspace_src_timestamps: HashMap<WorkspaceSrc, Timestamp>,
/// This is used so that a warning about failing to update the database is
/// only displayed once.
save_err_has_warned: bool,
Expand All @@ -1437,11 +1534,16 @@ impl DeferredGlobalLastUse {
DeferredGlobalLastUse {
registry_keys: HashMap::new(),
git_keys: HashMap::new(),
workspace_manifest_keys: HashMap::new(),
target_dir_keys: HashMap::new(),
registry_index_timestamps: HashMap::new(),
registry_crate_timestamps: HashMap::new(),
registry_src_timestamps: HashMap::new(),
git_db_timestamps: HashMap::new(),
git_checkout_timestamps: HashMap::new(),
target_dir_index_timestamps: HashMap::new(),
workspace_manifest_index_timestamps: HashMap::new(),
workspace_src_timestamps: HashMap::new(),
save_err_has_warned: false,
now: now(),
}
Expand All @@ -1453,6 +1555,9 @@ impl DeferredGlobalLastUse {
&& self.registry_src_timestamps.is_empty()
&& self.git_db_timestamps.is_empty()
&& self.git_checkout_timestamps.is_empty()
&& self.target_dir_index_timestamps.is_empty()
&& self.workspace_manifest_index_timestamps.is_empty()
&& self.workspace_src_timestamps.is_empty()
}

fn clear(&mut self) {
Expand All @@ -1461,6 +1566,9 @@ impl DeferredGlobalLastUse {
self.registry_src_timestamps.clear();
self.git_db_timestamps.clear();
self.git_checkout_timestamps.clear();
self.target_dir_index_timestamps.clear();
self.workspace_manifest_index_timestamps.clear();
self.workspace_src_timestamps.clear();
}

/// Indicates the given [`RegistryIndex`] has been used right now.
Expand Down Expand Up @@ -1489,6 +1597,13 @@ impl DeferredGlobalLastUse {
self.mark_git_checkout_used_stamp(git_checkout, None);
}

/// Indicates the given [`WorkspaceSrc`] has been used right now.
///
/// Also implicitly marks the workspace manifest used, too.
pub fn mark_workspace_src_used(&mut self, workspace_src: WorkspaceSrc) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, I wonder why we exposed these in the API, rather than taking parameters. Seems like it creates more boilerplate for the callers

self.mark_workspace_src_used_stamp(workspace_src, None);
}

/// Indicates the given [`RegistryIndex`] has been used with the given
/// time (or "now" if `None`).
pub fn mark_registry_index_used_stamp(
Expand Down Expand Up @@ -1553,6 +1668,26 @@ impl DeferredGlobalLastUse {
self.git_checkout_timestamps.insert(git_checkout, timestamp);
}

pub fn mark_workspace_src_used_stamp(
&mut self,
workspace_src: WorkspaceSrc,
timestamp: Option<&SystemTime>,
) {
let timestamp = timestamp.map_or(self.now, to_timestamp);
let workspace_manifest_index = WorkspaceManifestIndex {
workspace_manifest_path: workspace_src.workspace_manifest_path,
};
let target_dir_db = TargetDirIndex {
target_dir_path: workspace_src.target_dir_path,
};
self.target_dir_index_timestamps
.insert(target_dir_db, timestamp);
self.workspace_manifest_index_timestamps
.insert(workspace_manifest_index, timestamp);
self.workspace_src_timestamps
.insert(workspace_src, timestamp);
}

/// Saves all of the deferred information to the database.
///
/// This will also clear the state of `self`.
Expand All @@ -1566,9 +1701,13 @@ impl DeferredGlobalLastUse {
// These must run before the ones that refer to their IDs.
self.insert_registry_index_from_cache(&tx)?;
self.insert_git_db_from_cache(&tx)?;
self.insert_target_dir_index_from_cache(&tx)?;
self.insert_workspace_manifest_index_from_cache(&tx)?;

self.insert_registry_crate_from_cache(&tx)?;
self.insert_registry_src_from_cache(&tx)?;
self.insert_git_checkout_from_cache(&tx)?;
self.insert_workspace_src_from_cache(&tx)?;
tx.commit()?;
trace!(target: "gc", "last-use save complete");
Ok(())
Expand Down Expand Up @@ -1632,6 +1771,32 @@ impl DeferredGlobalLastUse {
);
}

// Flushes all of the `target_dir_db_timestamps` to the database,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Review note: I've not yet dug into all of the DB operations

// clearing `target_dir_index_timestamps`.
fn insert_target_dir_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
insert_or_update_parent!(
self,
conn,
"target_dir_index",
target_dir_index_timestamps,
target_dir_keys,
target_dir_path
);
}

// Flushes all of the `workspace_manifest_index_timestamps` to the database,
// clearing `workspace_manifest_index_timestamps`.
fn insert_workspace_manifest_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
insert_or_update_parent!(
self,
conn,
"workspace_manifest_index",
workspace_manifest_index_timestamps,
workspace_manifest_keys,
workspace_manifest_path
);
}

/// Flushes all of the `registry_crate_timestamps` to the database,
/// clearing `registry_index_timestamps`.
fn insert_registry_crate_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
Expand Down Expand Up @@ -1707,6 +1872,79 @@ impl DeferredGlobalLastUse {
Ok(())
}

// Flushes all of the `workspace_src_timestamps` to the database,
// clearing `workspace_src_timestamps`.
fn insert_workspace_src_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
let workspace_src_timestamps = std::mem::take(&mut self.workspace_src_timestamps);
for (workspace_src, timestamp) in workspace_src_timestamps {
let workspace_id = self.workspace_id(conn, workspace_src.workspace_manifest_path)?;
let target_dir_id = self.target_dir_id(conn, workspace_src.target_dir_path)?;
let mut stmt = conn.prepare_cached(
"INSERT INTO workspace_src (workspace_id, target_dir_id, timestamp)
VALUES (?1, ?2, ?3)
ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
WHERE timestamp < ?4",
)?;
stmt.execute(params![
workspace_id,
target_dir_id,
timestamp,
timestamp - UPDATE_RESOLUTION
])?;
}
Ok(())
}

fn workspace_id(
&mut self,
conn: &Connection,
encoded_workspace_manifest_path: InternedString,
) -> CargoResult<ParentId> {
match self
.workspace_manifest_keys
.get(&encoded_workspace_manifest_path)
{
Some(i) => Ok(*i),
None => {
let Some(id) = GlobalCacheTracker::id_from_name(
conn,
WORKSPACE_MANIFEST_TABLE,
&encoded_workspace_manifest_path,
)?
else {
bail!("expected workspace_manifest {encoded_workspace_manifest_path} to exist, but wasn't found");
};
self.workspace_manifest_keys
.insert(encoded_workspace_manifest_path, id);
Ok(id)
}
}
}

fn target_dir_id(
&mut self,
conn: &Connection,
encoded_target_dir_path: InternedString,
) -> CargoResult<ParentId> {
match self.target_dir_keys.get(&encoded_target_dir_path) {
Some(i) => Ok(*i),
None => {
let Some(id) = GlobalCacheTracker::id_from_name(
conn,
TARGET_DIR_TABLE,
&encoded_target_dir_path,
)?
else {
bail!(
"expected target_dir {encoded_target_dir_path} to exist, but wasn't found"
);
};
self.target_dir_keys.insert(encoded_target_dir_path, id);
Ok(id)
}
}
}

/// Returns the numeric ID of the registry, either fetching from the local
/// cache, or getting it from the database.
///
Expand Down
9 changes: 8 additions & 1 deletion src/cargo/ops/cargo_compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ use crate::core::compiler::{DefaultExecutor, Executor, UnitInterner};
use crate::core::profiles::Profiles;
use crate::core::resolver::features::{self, CliFeatures, FeaturesFor};
use crate::core::resolver::{HasDevUnits, Resolve};
use crate::core::{PackageId, PackageSet, SourceId, TargetKind, Workspace};
use crate::core::{global_cache_tracker, PackageId, PackageSet, SourceId, TargetKind, Workspace};
use crate::drop_println;
use crate::ops;
use crate::ops::resolve::WorkspaceResolve;
Expand Down Expand Up @@ -265,6 +265,13 @@ pub fn create_bcx<'a, 'gctx>(
}
};
let dry_run = false;
gctx.deferred_global_last_use()?
.mark_workspace_src_used(global_cache_tracker::WorkspaceSrc {
workspace_manifest_path: InternedString::new(ws.root_manifest().to_str().unwrap()),
target_dir_path: InternedString::new(
ws.target_dir().as_path_unlocked().to_str().unwrap(),
),
});
let resolve = ops::resolve_ws_with_opts(
ws,
&mut target_data,
Expand Down
15 changes: 15 additions & 0 deletions tests/testsuite/global_cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ fn implies_source() {
short_name: "f0a4ee0".into(),
size: None,
});
deferred.mark_workspace_src_used(global_cache_tracker::WorkspaceSrc {
target_dir_path: "/Users/foo/cargo/target".into(),
workspace_manifest_path: "/Users/foo/cargo/Cargo.toml".into(),
});
deferred.save(&mut tracker).unwrap();

let mut indexes = tracker.registry_index_all().unwrap();
Expand All @@ -254,6 +258,17 @@ fn implies_source() {
let dbs = tracker.git_db_all().unwrap();
assert_eq!(dbs.len(), 1);
assert_eq!(dbs[0].0.encoded_git_name, "cargo-e7ff1db891893a9e");

let workspace_manifests = tracker.workspace_manifest_all().unwrap();
assert_eq!(workspace_manifests.len(), 1);
assert_eq!(
workspace_manifests[0].0.workspace_manifest_path,
"/Users/foo/cargo/Cargo.toml"
);

let target_dirs = tracker.target_dir_all().unwrap();
assert_eq!(target_dirs.len(), 1);
assert_eq!(target_dirs[0].0.target_dir_path, "/Users/foo/cargo/target");
}

#[cargo_test]
Expand Down