diff --git a/Cargo.toml b/Cargo.toml index 316e356..815168b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" license = "MIT" [dependencies] +extendhash = "1" filetime = "0.2" rand = "0.8" tempfile = "3" diff --git a/src/lib.rs b/src/lib.rs index 786fa06..bb05c91 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ //! Kismet implements multiprocess lock-free[^lock-free-fs] -//! application-crash-safe (roughly) bounded persistent caches stored +//! crash-safe and (roughly) bounded persistent caches stored //! in filesystem directories, with a -//! [Second Chance (Clock)](https://en.wikipedia.org/wiki/Page_replacement_algorithm#Second-chance) +//! [Second Chance](https://en.wikipedia.org/wiki/Page_replacement_algorithm#Second-chance) //! eviction strategy. The maintenance logic is batched and invoked //! at periodic jittered intervals to make sure accesses amortise to a //! constant number of filesystem system calls and logarithmic (in the @@ -36,26 +36,27 @@ //! directory, one byte of lock-free metadata per shard, and no other //! non-heap resource (i.e., Kismet caches do not hold on to file //! objects). This holds for individual cache directories; when -//! stacking multiple caches in a [`Cache`], the read-write cache and -//! all constituent read-only caches will each have their own -//! `PathBuf` and per-shard metadata. +//! stacking multiple caches in a [`Cache`] or [`ReadOnlyCache`], the +//! read-write cache and all constituent read-only caches will each +//! have their own `PathBuf` and per-shard metadata. //! //! When a Kismet cache triggers second chance evictions, it will //! allocate temporary data. That data's size is proportional to the //! number of files in the cache shard subdirectory undergoing //! eviction (or the whole directory for a plain unsharded cache), and -//! includes a copy of the name (without the path prefix) for each +//! includes a copy of the basename (without the path prefix) for each //! cached file in the subdirectory (or plain cache directory). This //! eviction process is linearithmic-time in the number of files in //! the cache subdirectory (directory), and is invoked periodically, -//! so as to amortise the maintenance time overhead to logarithmic -//! per write to a cache subdirectory. +//! so as to amortise the maintenance overhead to logarithmic (in the +//! total number of files in the subdirectory) time per write to a +//! cache subdirectory, and constant file operations per write. //! //! Kismet does not pre-allocate any long-lived file object, so it may -//! need to temporarily open file objects. However, each call into -//! Kismet will always bound the number of concurrently allocated file -//! objects; the current logic never allocates more than two -//! concurrent file objects. +//! need to temporarily open file objects. Each call nevertheless +//! bounds the number of concurrently allocated file objects; the +//! current logic never allocates more than two concurrent file +//! objects. //! //! The load (number of files) in each cache may exceed the cache's //! capacity because there is no centralised accounting, except for @@ -73,6 +74,8 @@ //! multiple places, as long as the files are not modified, or their //! `mtime` otherwise updated, through these non-Kismet links. //! +//! # Plain and sharded caches +//! //! Kismet cache directories are plain (unsharded) or sharded. //! //! Plain Kismet caches are simply directories where the cache entry for @@ -88,27 +91,20 @@ //! //! Simple usage should be covered by the [`ReadOnlyCache`] or //! [`Cache`] structs, which wrap [`plain::Cache`] and -//! [`sharded::Cache`] in a convenient type-erased interface. The -//! caches *do not* invoke [`std::fs::File::sync_all`] or [`std::fs::File::sync_data`]: -//! the caller should sync files before letting Kismet persist them in -//! a cache if necessary. File synchronisation is not automatic -//! because it makes sense to implement persistent filesystem caches -//! that are erased after each boot, e.g., via -//! [tmpfiles.d](https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html), -//! or by tagging cache directories with a -//! [boot id](https://man7.org/linux/man-pages/man3/sd_id128_get_machine.3.html). -//! -//! The cache code also does not sync the parent cache directories: we -//! assume that it's safe, if unfortunate, for caches to lose data or -//! revert to an older state after kernel or hardware crashes. In -//! general, the code attempts to be robust again direct manipulation -//! of the cache directories. It's always safe to delete cache files -//! from kismet directories (ideally not recently created files in -//! `.kismet_temp` directories), and even *adding* files should mostly -//! do what one expects: they will be picked up if they're in the -//! correct place (in a plain unsharded cache directory or in the -//! correct shard subdirectory), and eventually evicted if useless or -//! in the wrong shard. +//! [`sharded::Cache`] in a convenient type-erased interface. +//! +//! While the cache code syncs cached data files by default, it does +//! not sync the parent cache directories: we assume that it's safe, +//! if unfortunate, for caches to lose data or revert to an older +//! state after kernel or hardware crashes. In general, the code +//! attempts to be robust again direct manipulation of the cache +//! directories. It's always safe to delete cache files from kismet +//! directories (ideally not recently created files in `.kismet_temp` +//! subdirectories), and even *adding* files should mostly do what one +//! expects: they will be picked up if they're in the correct place +//! (in a plain unsharded cache directory or in the correct shard +//! subdirectory), and eventually evicted if useless or in the wrong +//! shard. //! //! It is however essential to only publish files atomically to the //! cache directories, and it probably never makes sense to modify @@ -180,10 +176,7 @@ //! // Fetches the current cached value for `key`, or populates it with //! // the closure argument if missing. //! let mut cached_file = cache -//! .ensure(&key, |file| { -//! file.write_all(&get_contents(&key))?; -//! file.sync_all() -//! })?; +//! .ensure(&key, |file| file.write_all(&get_contents(&key)))?; //! let mut contents = Vec::new(); //! cached_file.read_to_end(&mut contents)?; //! # Ok(()) @@ -226,11 +219,12 @@ //! Kismet will always store its internal data in files or directories //! start start with a `.kismet` prefix, and cached data lives in //! files with names equal to their keys. Since Kismet sanitises -//! cache keys to forbid them from starting with `.`, `/`, or `\\`, it +//! cache keys to forbid them from starting with `.`, `/`, or `\`, it //! is always safe for an application to store additional data in //! files or directories that start with a `.`, as long as they do not //! collide with the `.kismet` prefix. mod cache_dir; +mod multiplicative_hash; pub mod plain; pub mod raw_cache; mod readonly; @@ -247,7 +241,7 @@ pub use stack::CacheHit; pub use stack::CacheHitAction; /// Kismet cache directories put temporary files under this -/// subdirectory. +/// subdirectory in each cache or cache shard directory. pub const KISMET_TEMPORARY_SUBDIRECTORY: &str = ".kismet_temp"; /// Cache keys consist of a filename and two hash values. The two diff --git a/src/multiplicative_hash.rs b/src/multiplicative_hash.rs new file mode 100644 index 0000000..1dfb4bc --- /dev/null +++ b/src/multiplicative_hash.rs @@ -0,0 +1,158 @@ +/// Multiplicative hash structs implement +/// [Dietzfelbinger's universal multiplicative hash function](https://link.springer.com/chapter/10.1007/978-3-319-98355-4_15) +/// with `const fn` keyed constructors, and pair that with a range +/// reduction function from `u64` to a `usize` range that extends +/// Dietzfelbinger's power-of-two scheme. +#[derive(Clone, Copy, Debug)] +pub struct MultiplicativeHash { + // Pseudorandom odd multiplier + multiplier: u64, + // Pseudorandom value added to the product + addend: u64, +} + +/// Maps vaues in `[0, u64::MAX]` to `[0, domain)` linearly. +/// +/// As a special case, this function returns 0 instead of erroring out +/// when `domain == 0`. +#[inline(always)] +const fn remap(x: u64, domain: usize) -> usize { + ((domain as u128 * x as u128) >> 64) as usize +} + +impl MultiplicativeHash { + /// Deterministically constructs a `MultiplicativeHash` with + /// parameters derived from the `key`, wih a SHA-256 hash. + pub const fn new(key: &[u8]) -> MultiplicativeHash { + use extendhash::sha256; + + let hash = sha256::compute_hash(key); + let multiplier = [ + hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7], + ]; + let addend = [ + hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15], + ]; + MultiplicativeHash { + multiplier: u64::from_le_bytes(multiplier), + addend: u64::from_le_bytes(addend), + } + } + + /// Constructs a new pseudorandom `MultiplicativeHash`. + pub fn new_random() -> MultiplicativeHash { + use rand::Rng; + + let mut rnd = rand::thread_rng(); + MultiplicativeHash { + multiplier: rnd.gen::() | 1, + addend: rnd.gen(), + } + } + + /// Mixes `value` with this hash's parameters. If you must + /// truncate the result, use its high bits. + #[inline(always)] + pub const fn mix(&self, value: u64) -> u64 { + value + .wrapping_mul(self.multiplier) + .wrapping_add(self.addend) + } + + /// Mixes `value` and maps the result to a usize less than range. + /// + /// If `range == 0`, always returns 0. + #[inline(always)] + pub const fn map(&self, value: u64, range: usize) -> usize { + remap(self.mix(value), range) + } +} + +/// Smoke test the `remap` function. +#[test] +fn test_remap() { + // Mapping to an empty range should always return 0. + assert_eq!(remap(0, 0), 0); + assert_eq!(remap(u64::MAX, 0), 0); + + // Otherwise smoke test the remapping + assert_eq!(remap(0, 17), 0); + assert_eq!(remap(u64::MAX / 17, 17), 0); + assert_eq!(remap(1 + u64::MAX / 17, 17), 1); + assert_eq!(remap(u64::MAX, 17), 16); +} + +/// Mapping to a power-of-two sized range is the same as taking the +/// high bits. +#[test] +fn test_remap_power_of_two() { + assert_eq!(remap(10 << 33, 1 << 32), 10 << 1); + assert_eq!(remap(15 << 60, 1 << 8), 15 << 4); +} + +/// Construct two different hashers. We should get different values +/// for `mix`. +#[test] +fn test_mix() { + let h1 = MultiplicativeHash::new(b"h1"); + let h2 = MultiplicativeHash::new(b"h2"); + + assert!(h1.mix(0) != h2.mix(0)); + assert!(h1.mix(1) != h2.mix(1)); + assert!(h1.mix(42) != h2.mix(42)); + assert!(h1.mix(u64::MAX) != h2.mix(u64::MAX)); +} + +/// Construct two random hashers. We should get different values +/// for `mix`. +#[test] +fn test_random_mix() { + let h1 = MultiplicativeHash::new_random(); + let h2 = MultiplicativeHash::new_random(); + + assert!(h1.mix(0) != h2.mix(0)); + assert!(h1.mix(1) != h2.mix(1)); + assert!(h1.mix(42) != h2.mix(42)); + assert!(h1.mix(u64::MAX) != h2.mix(u64::MAX)); +} + +/// Construct two different hashers. We should get different +/// values for `map`. +#[test] +fn test_map() { + let h1 = MultiplicativeHash::new(b"h1"); + let h2 = MultiplicativeHash::new(b"h2"); + + assert!(h1.map(0, 1024) != h2.map(0, 1024)); + assert!(h1.map(1, 1234) != h2.map(1, 1234)); + assert!(h1.map(42, 4567) != h2.map(42, 4567)); + assert!(h1.map(u64::MAX, 789) != h2.map(u64::MAX, 789)); +} + +/// Confirm that construction is const and deterministic. +#[test] +fn test_construct() { + const H: MultiplicativeHash = MultiplicativeHash::new(b"asdfg"); + + // Given the nature of the hash function, two points suffice to + // derive the parameters. + + // addend = 7162733811001658625 + assert_eq!(H.mix(0), 7162733811001658625); + // multiplier = 14551484392748644090 - addend = 7388750581746985465 + assert_eq!(H.mix(1), 14551484392748644090); + + // But it doesn't hurt to test a couple more points. + assert_eq!( + H.mix(42), + 42u64 + .wrapping_mul(7388750581746985465) + .wrapping_add(7162733811001658625) + ); + assert_eq!( + H.mix(u64::MAX), + u64::MAX + .wrapping_mul(7388750581746985465) + .wrapping_add(7162733811001658625) + ); +} diff --git a/src/plain.rs b/src/plain.rs index 59c40d2..d82c8cc 100644 --- a/src/plain.rs +++ b/src/plain.rs @@ -8,7 +8,11 @@ //! in flat directories. //! //! This module is useful for lower level usage; in most cases, the -//! [`crate::Cache`] is more convenient and just as efficient. +//! [`crate::Cache`] is more convenient and just as efficient. In +//! particular, a `crate::plain::Cache` *does not* invoke +//! [`std::fs::File::sync_all`] or [`std::fs::File::sync_data`]: the +//! caller should sync files before letting Kismet persist them in a +//! directory, if necessary. //! //! The cache's contents will grow past its stated capacity, but //! should rarely reach more than twice that capacity. diff --git a/src/sharded.rs b/src/sharded.rs index cd9ca42..eced655 100644 --- a/src/sharded.rs +++ b/src/sharded.rs @@ -12,7 +12,11 @@ //! and an optional `.kismet_temp` subdirectory for temporary files. //! //! This module is useful for lower level usage; in most cases, the -//! [`crate::Cache`] is more convenient and just as efficient. +//! [`crate::Cache`] is more convenient and just as efficient. In +//! particular, a `crate::sharded::Cache` *does not* invoke +//! [`std::fs::File::sync_all`] or [`std::fs::File::sync_data`]: the +//! caller should sync files before letting Kismet persist them in a +//! directory, if necessary. //! //! The cache's contents will grow past its stated capacity, but //! should rarely reach more than twice that capacity, especially @@ -27,6 +31,7 @@ use std::sync::atomic::Ordering::Relaxed; use std::sync::Arc; use crate::cache_dir::CacheDir; +use crate::multiplicative_hash::MultiplicativeHash; use crate::trigger::PeriodicTrigger; use crate::Key; use crate::KISMET_TEMPORARY_SUBDIRECTORY as TEMP_SUBDIR; @@ -36,9 +41,13 @@ use crate::KISMET_TEMPORARY_SUBDIRECTORY as TEMP_SUBDIR; /// shard capacity inserts or updates. const MAINTENANCE_SCALE: usize = 2; -const RANDOM_MULTIPLIER: u64 = 0xf2efdf1111adba6f; +/// These mixers must be the same for all processes that access the +/// same sharded cache directory. That's why we derive the parameters +/// in a const function. +const PRIMARY_MIXER: MultiplicativeHash = MultiplicativeHash::new(b"kismet: primary shard mixer"); -const SECONDARY_RANDOM_MULTIPLIER: u64 = 0xa55e1e02718a6a47; +const SECONDARY_MIXER: MultiplicativeHash = + MultiplicativeHash::new(b"kismet: secondary shard mixer"); /// A sharded cache is a hash-sharded directory of cache /// subdirectories. Each subdirectory is managed as an @@ -185,18 +194,12 @@ impl Cache { fn shard_ids(&self, key: Key) -> (usize, usize) { // We can't assume the hash is well distributed, so mix it // around a bit with a multiplicative hash. - let remap = |x: u64, mul: u64| { - let hash = x.wrapping_mul(mul) as u128; - // Map the hashed hash to a shard id with a fixed point - // multiplication. - ((self.num_shards as u128 * hash) >> 64) as usize - }; + let h1 = PRIMARY_MIXER.map(key.hash, self.num_shards); + let h2 = SECONDARY_MIXER.map(key.secondary_hash, self.num_shards); // We do not apply a 2-left strategy because our load // estimates can saturate. When that happens, we want to // revert to sharding based on `key.hash`. - let h1 = remap(key.hash, RANDOM_MULTIPLIER); - let h2 = remap(key.secondary_hash, SECONDARY_RANDOM_MULTIPLIER); (h1, self.other_shard_id(h1, h2)) } diff --git a/src/stack.rs b/src/stack.rs index fc850e6..0496db7 100644 --- a/src/stack.rs +++ b/src/stack.rs @@ -103,12 +103,23 @@ impl FullCache for ShardedCache { /// always first access its write-side cache (if defined), and, on /// misses, will attempt to service [`Cache::get`] and /// [`Cache::touch`] calls by iterating over the read-only caches. -#[derive(Debug, Default)] +#[derive(Debug)] pub struct CacheBuilder { write_side: Option>, + auto_sync: bool, read_side: ReadOnlyCacheBuilder, } +impl Default for CacheBuilder { + fn default() -> CacheBuilder { + CacheBuilder { + write_side: None, + auto_sync: true, + read_side: Default::default(), + } + } +} + /// A [`Cache`] wraps either up to one plain or sharded read-write /// cache in a convenient interface, and may optionally fulfill read /// operations by deferring to a list of read-only cache when the @@ -122,12 +133,30 @@ pub struct CacheBuilder { /// of directories: using the same [`Cache`] object improves the /// accuracy of the write cache's lock-free in-memory statistics, when /// it's a sharded cache. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct Cache { + // The write-side cache services writes and is the cache of first + // resort for `get` and `touch`. write_side: Option>, + // Whether to automatically sync file contents before publishing + // them to the write-side cache. + auto_sync: bool, + // The read-side cache (a list of read-only caches) services `get` + // and `touch` calls when we fail to find something in the + // write-side cache. read_side: ReadOnlyCache, } +impl Default for Cache { + fn default() -> Cache { + Cache { + write_side: None, + auto_sync: true, + read_side: Default::default(), + } + } +} + /// Where does a cache hit come from: the primary read-write cache, or /// one of the secondary read-only caches? pub enum CacheHit<'a> { @@ -195,6 +224,26 @@ impl CacheBuilder { self } + /// Sets whether files published read-write cache will be + /// automatically flushed to disk with [`File::sync_all`] + /// before sending them to the cache directory. + /// + /// Defaults to true, for safety. Even when `auto_sync` is + /// enabled, Kismet does not `fsync` cache directories; after a + /// kernel or hardware crash, caches may partially revert to an + /// older state, but should not contain incomplete files. + /// + /// An application may want to disable `auto_sync` because it + /// already synchronises files, or because the cache directories + /// do not survive crashes: they might be erased after each boot, + /// e.g., via + /// [tmpfiles.d](https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html), + /// or tagged with a [boot id](https://man7.org/linux/man-pages/man3/sd_id128_get_machine.3.html). + pub fn auto_sync(mut self, sync: bool) -> Self { + self.auto_sync = sync; + self + } + /// Adds a new read-only cache directory at `path` to the end of the /// cache builder's search list. /// @@ -224,12 +273,46 @@ impl CacheBuilder { pub fn build(self) -> Cache { Cache { write_side: self.write_side, + auto_sync: self.auto_sync, read_side: self.read_side.build(), } } } impl Cache { + /// Calls [`File::sync_all`] on `file` if `Cache::auto_sync` + /// is true. + #[inline] + fn maybe_sync(&self, file: &File) -> Result<()> { + if self.auto_sync { + file.sync_all() + } else { + Ok(()) + } + } + + /// Opens `path` and calls [`File::sync_all`] on the resulting + /// file, if `Cache::auto_sync` is true. + /// + /// Panics when [`File::sync_all`] fails. See + /// https://wiki.postgresql.org/wiki/Fsync_Errors or + /// Rebello et al's "Can Applications Recover from fsync Failures?" + /// (https://www.usenix.org/system/files/atc20-rebello.pdf) + /// for an idea of the challenges associated with handling + /// fsync failures on persistent files. + fn maybe_sync_path(&self, path: &Path) -> Result<()> { + if self.auto_sync { + // It's really not clear what happens to a file's content + // if we open it just before fsync, and fsync fails. It + // should be safe to just unlink the file + std::fs::File::open(&path)? + .sync_all() + .expect("auto_sync failed, and failure semantics are unclear for fsync"); + } + + Ok(()) + } + /// Attempts to open a read-only file for `key`. The `Cache` will /// query each its write cache (if any), followed by the list of /// additional read-only cache, in definition order, and return a @@ -328,17 +411,19 @@ impl Cache { } // Promotes `file` to `cache`. - fn promote(cache: &dyn FullCache, key: Key, mut file: File) -> Result { + fn promote(cache: &dyn FullCache, sync: bool, key: Key, mut file: File) -> Result { use std::io::Seek; let mut tmp = NamedTempFile::new_in(cache.temp_dir(key)?)?; std::io::copy(&mut file, tmp.as_file_mut())?; // Force the destination file's contents to disk before - // adding it to the read-write cache: the caller can't - // tell us whether they want a `fsync`, so let's be safe - // and assume they do. - tmp.as_file().sync_all()?; + // adding it to the read-write cache, if we're supposed to + // sync files automatically. + if sync { + tmp.as_file().sync_all()?; + } + cache.put(key, tmp.path())?; // We got a read-only file. Rewind it before returning. @@ -360,7 +445,9 @@ impl Cache { } else if let Some(mut file) = self.read_side.get(key)? { match judge(CacheHit::Secondary(&mut file)) { CacheHitAction::Accept => return Ok(file), - CacheHitAction::Promote => return promote(get_write_cache(self)?, key, file), + CacheHitAction::Promote => { + return promote(get_write_cache(self)?, self.auto_sync, key, file) + } CacheHitAction::Replace => old = Some(file), } } @@ -370,6 +457,7 @@ impl Cache { // Either way, start by populating a temporary file. let mut tmp = NamedTempFile::new_in(cache.temp_dir(key)?)?; populate(tmp.as_file_mut(), old)?; + self.maybe_sync(tmp.as_file())?; // Grab a read-only return value before publishing the file. let path = tmp.path(); @@ -383,9 +471,21 @@ impl Cache { Ok(ret) } + fn set_impl(&self, key: Key, value: &Path) -> Result<()> { + match self.write_side.as_ref() { + Some(write) => write.set(key, value), + None => Err(Error::new( + ErrorKind::Unsupported, + "no kismet write cache defined", + )), + } + } + /// Inserts or overwrites the file at `value` as `key` in the /// write cache directory. This will always fail with /// [`ErrorKind::Unsupported`] if no write cache was defined. + /// The path at `value` must be in the same filesystem as the + /// write cache directory: we rely on atomic file renames. /// /// Fails with [`ErrorKind::InvalidInput`] if `key.name` is invalid /// (empty, or starts with a dot or a forward or back slash). @@ -393,13 +493,43 @@ impl Cache { /// Always consumes the file at `value` on success; may consume it /// on error. /// + /// When `auto_sync` is enabled (the default), the file at `value` + /// will always be [`File::sync_all`]ed before publishing to the + /// cache. Kismet will **panic** when the [`File::sync_all`] call + /// itself fails: retrying the same call to [`Cache::set`] could + /// erroneously succeed, since some filesystems clear internal I/O + /// failure flag after the first `fsync`. + /// /// Executes in a bounded number of file operations, except for /// the lock-free maintenance, which needs time linearithmic in /// the number of files in the directory chosen for maintenance, - /// amortised to logarithmic. + /// amortised to logarithmic, and constant number of file operations. pub fn set<'a>(&self, key: impl Into>, value: impl AsRef) -> Result<()> { + fn doit(this: &Cache, key: Key, value: &Path) -> Result<()> { + this.maybe_sync_path(value)?; + this.set_impl(key, value) + } + + doit(self, key.into(), value.as_ref()) + } + + /// Invokes [`Cache::set`] on a [`tempfile::NamedTempFile`]. + /// + /// See [`Cache::set`] for more details. The only difference is + /// that `set_temp_file` does not panic when `auto_sync` is enabled + /// and we fail to [`File::sync_all`] the [`NamedTempFile`] value. + pub fn set_temp_file<'a>(&self, key: impl Into>, value: NamedTempFile) -> Result<()> { + fn doit(this: &Cache, key: Key, value: NamedTempFile) -> Result<()> { + this.maybe_sync(value.as_file())?; + this.set_impl(key, value.path()) + } + + doit(self, key.into(), value) + } + + fn put_impl(&self, key: Key, value: &Path) -> Result<()> { match self.write_side.as_ref() { - Some(write) => write.set(key.into(), value.as_ref()), + Some(write) => write.put(key, value), None => Err(Error::new( ErrorKind::Unsupported, "no kismet write cache defined", @@ -411,6 +541,8 @@ impl Cache { /// there is no such cached entry already, or touches the cached /// file if it already exists. This will always fail with /// [`ErrorKind::Unsupported`] if no write cache was defined. + /// The path at `value` must be in the same filesystem as the + /// write cache directory: we rely on atomic file hard linkage. /// /// Fails with [`ErrorKind::InvalidInput`] if `key.name` is invalid /// (empty, or starts with a dot or a forward or back slash). @@ -418,18 +550,38 @@ impl Cache { /// Always consumes the file at `value` on success; may consume it /// on error. /// + /// When `auto_sync` is enabled (the default), the file at `value` + /// will always be [`File::sync_all`]ed before publishing to the + /// cache. Kismet will **panic** when the [`File::sync_all`] call + /// itself fails: retrying the same call to [`Cache::put`] could + /// erroneously succeed, since some filesystems clear internal I/O + /// failure flag after the first `fsync`. + /// /// Executes in a bounded number of file operations, except for /// the lock-free maintenance, which needs time linearithmic in /// the number of files in the directory chosen for maintenance, - /// amortised to logarithmic. + /// amortised to logarithmic, and constant number of file operations. pub fn put<'a>(&self, key: impl Into>, value: impl AsRef) -> Result<()> { - match self.write_side.as_ref() { - Some(write) => write.put(key.into(), value.as_ref()), - None => Err(Error::new( - ErrorKind::Unsupported, - "no kismet write cache defined", - )), + fn doit(this: &Cache, key: Key, value: &Path) -> Result<()> { + this.maybe_sync_path(value)?; + this.put_impl(key, value) + } + + doit(self, key.into(), value.as_ref()) + } + + /// Invokes [`Cache::put`] on a [`tempfile::NamedTempFile`]. + /// + /// See [`Cache::put`] for more details. The only difference is + /// that `put_temp_file` does not panic when `auto_sync` is enabled + /// and we fail to [`File::sync_all`] the [`NamedTempFile`] value. + pub fn put_temp_file<'a>(&self, key: impl Into>, value: NamedTempFile) -> Result<()> { + fn doit(this: &Cache, key: Key, value: NamedTempFile) -> Result<()> { + this.maybe_sync(value.as_file())?; + this.put_impl(key, value.path()) } + + doit(self, key.into(), value) } /// Marks a cache entry for `key` as accessed (read). The [`Cache`] @@ -501,20 +653,45 @@ mod test { // write calls should fail. #[test] fn empty() { + use test_dir::{DirBuilder, FileType, TestDir}; + + let temp = TestDir::temp().create("foo", FileType::RandomFile(10)); let cache: Cache = Default::default(); assert!(matches!(cache.get(&TestKey::new("foo")), Ok(None))); assert!( matches!(cache.ensure(&TestKey::new("foo"), |_| unreachable!("should not be called when there is no write side")), - Err(e) if e.kind() == ErrorKind::Unsupported) + Err(e) if e.kind() == ErrorKind::Unsupported) ); - assert!(matches!(cache.set(&TestKey::new("foo"), "/tmp/foo"), + assert!(matches!(cache.set(&TestKey::new("foo"), &temp.path("foo")), Err(e) if e.kind() == ErrorKind::Unsupported)); - assert!(matches!(cache.put(&TestKey::new("foo"), "/tmp/foo"), + assert!(matches!(cache.put(&TestKey::new("foo"), &temp.path("foo")), Err(e) if e.kind() == ErrorKind::Unsupported)); assert!(matches!(cache.touch(&TestKey::new("foo")), Ok(false))); } + // Disable autosync; we should get an `Unsupported` error even if the + // input file does not exist. + #[test] + fn empty_no_auto_sync() { + let cache = CacheBuilder::new().auto_sync(false).build(); + + assert!(matches!(cache.get(&TestKey::new("foo")), Ok(None))); + assert!( + matches!(cache.ensure(&TestKey::new("foo"), |_| unreachable!("should not be called when there is no write side")), + Err(e) if e.kind() == ErrorKind::Unsupported) + ); + assert!( + matches!(cache.set(&TestKey::new("foo"), "/no-such-tmp/foo"), + Err(e) if e.kind() == ErrorKind::Unsupported) + ); + assert!( + matches!(cache.put(&TestKey::new("foo"), "/no-such-tmp/foo"), + Err(e) if e.kind() == ErrorKind::Unsupported) + ); + assert!(matches!(cache.touch(&TestKey::new("foo")), Ok(false))); + } + // Fail to find a file, ensure it, then see that we can get it. #[test] fn test_ensure() { @@ -522,7 +699,11 @@ mod test { use test_dir::{DirBuilder, TestDir}; let temp = TestDir::temp(); - let cache = CacheBuilder::new().writer(temp.path("."), 1, 10).build(); + // Get some coverage for no-auto_sync config. + let cache = CacheBuilder::new() + .writer(temp.path("."), 1, 10) + .auto_sync(false) + .build(); let key = TestKey::new("foo"); // The file doesn't exist initially. @@ -958,8 +1139,9 @@ mod test { tmp.as_file() .write_all(b"write2") .expect("write must succeed"); + // Exercise put_temp_file as well. cache - .put(&TestKey::new("b"), tmp.path()) + .put_temp_file(&TestKey::new("b"), tmp) .expect("put must succeed"); } @@ -1112,8 +1294,9 @@ mod test { tmp.as_file() .write_all(b"write2") .expect("write must succeed"); + // Exercise set_temp_file. cache - .set(&TestKey::new("b"), tmp.path()) + .set_temp_file(&TestKey::new("b"), tmp) .expect("set must succeed"); }