From 3030efde1b0aeeab1fc4da2271e3504779e810fb Mon Sep 17 00:00:00 2001 From: Paul Khuong Date: Mon, 6 Sep 2021 19:31:48 -0400 Subject: [PATCH] readonly: new public interface for a stack of read-only cache dirs When storing cache data in filesystem directories, it often makes sense to pull from a few read-only directories, e.g., when migrating from one cache scheme to another, or when reading from another program's cache. The `ReadOnlyCache` abstracts that process, while hiding the difference in type between plain and sharded caches. TESTED=new smoke test. --- src/lib.rs | 3 + src/readonly.rs | 345 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 src/readonly.rs diff --git a/src/lib.rs b/src/lib.rs index fa7a0c0..db8a391 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,14 @@ mod cache_dir; mod plain; pub mod raw_cache; +mod readonly; pub mod second_chance; mod sharded; mod trigger; pub use plain::PlainCache; +pub use readonly::ReadOnlyCache; +pub use readonly::ReadOnlyCacheBuilder; pub use sharded::ShardedCache; /// Sharded cache keys consist of a filename and two hash values. The diff --git a/src/readonly.rs b/src/readonly.rs new file mode 100644 index 0000000..06581b3 --- /dev/null +++ b/src/readonly.rs @@ -0,0 +1,345 @@ +//! A `ReadOnlyCache` wraps an arbitrary number of caches, and +//! attempts to satisfy `get` and `touch` requests by hitting each +//! cache in order. For read-only use-cases, this should be a simple +//! and easy-to-use interface that erases the difference between plain +//! and sharded caches. +use std::fs::File; +use std::io::Result; +use std::path::Path; +use std::sync::Arc; + +use crate::Key; +use crate::PlainCache; +use crate::ShardedCache; + +/// The `ReadSide` trait offers `get` and `touch`, as implemented by +/// both plain and sharded caches. +trait ReadSide: std::fmt::Debug { + /// Returns a read-only file for `key` in the cache directory if + /// it exists, or None if there is no such file. + /// + /// Implicitly "touches" the cached file if it exists. + fn get(&self, key: Key) -> Result>; + + /// Marks the cached file `key` as newly used, if it exists. + /// + /// Returns whether a file for `key` exists in the cache. + fn touch(&self, key: Key) -> Result; +} + +impl ReadSide for PlainCache { + fn get(&self, key: Key) -> Result> { + PlainCache::get(self, key.name) + } + + fn touch(&self, key: Key) -> Result { + PlainCache::touch(self, key.name) + } +} + +impl ReadSide for ShardedCache { + fn get(&self, key: Key) -> Result> { + ShardedCache::get(self, key) + } + + fn touch(&self, key: Key) -> Result { + ShardedCache::touch(self, key) + } +} + +/// Construct a `ReadOnlyCache` with this builder. The resulting +/// cache will access each constituent cache directory in the order +/// they were added. +#[derive(Debug)] +pub struct ReadOnlyCacheBuilder { + stack: Vec>, +} + +impl ReadOnlyCacheBuilder { + /// Returns a fresh empty builder. + pub fn new() -> Self { + Self { stack: Vec::new() } + } + + /// Adds a new cache directory at `path` to the end of the cache + /// builder's search list. + /// + /// Adds a plain cache directory if `num_shards <= 1`, and an + /// actual sharded directory otherwise. + pub fn add(self, path: impl AsRef, num_shards: usize) -> Self { + if num_shards <= 1 { + self.add_plain(path) + } else { + self.add_sharded(path, num_shards) + } + } + + /// Adds a new plain cache directory at `path` to the end of the + /// cache builder's search list. A plain cache directory is + /// merely a directory of files where the files' names match their + /// key's name. + pub fn add_plain(mut self, path: impl AsRef) -> Self { + self.stack.push(Box::new(PlainCache::new( + path.as_ref().to_owned(), + usize::MAX, + ))); + + self + } + + /// Adds a new sharded cache directory at `path` to the end of the + /// cache builder's search list. + pub fn add_sharded(mut self, path: impl AsRef, num_shards: usize) -> Self { + self.stack.push(Box::new(ShardedCache::new( + path.as_ref().to_owned(), + num_shards, + usize::MAX, + ))); + self + } + + /// Returns a fresh `ReadOnlyCache` for the builder's search list + /// of constituent cache directories. + pub fn build(self) -> ReadOnlyCache { + ReadOnlyCache::new(self.stack) + } +} + +/// A `ReadOnlyCache` wraps an arbitrary number of caches, and +/// attempts to satisfy `get` and `touch` requests by hitting each +/// constituent cache in order. This interface hides the difference +/// between plain and sharded cache directories, and should be the +/// first resort for read-only uses. +#[derive(Clone, Debug)] +pub struct ReadOnlyCache { + stack: Arc<[Box]>, +} + +impl ReadOnlyCache { + fn new(stack: Vec>) -> ReadOnlyCache { + ReadOnlyCache { + stack: stack.into_boxed_slice().into(), + } + } + + /// Attempts to open a read-only file for `key`. The + /// `ReadOnlyCache` will query each constituent cache in order of + /// registration, and return a read-only file for the first hit. + /// + /// Returns `None` if no file for `key` can be found in any of the + /// constituent caches, and bubbles up the first I/O error + /// encountered, if any. + pub fn get<'a>(&self, key: impl Into>) -> Result> { + fn doit(stack: &[Box], key: Key) -> Result> { + for cache in stack.iter() { + if let Some(ret) = cache.get(key)? { + return Ok(Some(ret)); + } + } + + Ok(None) + } + + doit(&*self.stack, key.into()) + } + + /// Marks a cache entry for `key` as accessed (read). The + /// `ReadOnlyCache` will touch the same file that would be returned + /// by `get`. + /// + /// Returns whether a file for `key` could be found, and bubbles + /// up the first I/O error encountered, if any. + pub fn touch<'a>(&self, key: impl Into>) -> Result { + fn doit(stack: &[Box], key: Key) -> Result { + for cache in stack.iter() { + if cache.touch(key)? { + return Ok(true); + } + } + + Ok(false) + } + + doit(&*self.stack, key.into()) + } +} + +#[cfg(test)] +mod test { + use crate::Key; + use crate::PlainCache; + use crate::ReadOnlyCacheBuilder; + use crate::ShardedCache; + + struct TestKey { + key: String, + } + + impl TestKey { + fn new(key: &str) -> TestKey { + TestKey { + key: key.to_string(), + } + } + } + + impl<'a> From<&'a TestKey> for Key<'a> { + fn from(x: &'a TestKey) -> Key<'a> { + Key::new(&x.key, 0, 1) + } + } + + /// An empty stack should always succeed with a trivial result. + #[test] + fn empty() { + let ro = ReadOnlyCacheBuilder::new().build(); + + assert!(matches!(ro.get(Key::new("foo", 1, 2)), Ok(None))); + assert!(matches!(ro.touch(Key::new("foo", 1, 2)), Ok(false))); + } + + /// Populate a plain and a sharded cache. We should be able to access + /// both. + #[test] + fn smoke_test() { + use std::io::{Read, Write}; + use tempfile::NamedTempFile; + use test_dir::{DirBuilder, FileType, TestDir}; + + let temp = TestDir::temp() + .create("sharded", FileType::Dir) + .create("plain", FileType::Dir); + + { + let cache = ShardedCache::new(temp.path("sharded"), 10, 20); + + let tmp = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed")) + .expect("new temp file must succeed"); + tmp.as_file() + .write_all(b"sharded") + .expect("write must succeed"); + + cache + .put(Key::new("a", 0, 1), tmp.path()) + .expect("put must succeed"); + + let tmp2 = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed")) + .expect("new temp file must succeed"); + tmp2.as_file() + .write_all(b"sharded2") + .expect("write must succeed"); + + cache + .put(Key::new("b", 0, 1).into(), tmp2.path()) + .expect("put must succeed"); + } + + { + let cache = PlainCache::new(temp.path("plain"), 10); + + let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed")) + .expect("new temp file must succeed"); + tmp.as_file() + .write_all(b"plain") + .expect("write must succeed"); + + cache.put("b", tmp.path()).expect("put must succeed"); + + let tmp2 = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed")) + .expect("new temp file must succeed"); + tmp2.as_file() + .write_all(b"plain2") + .expect("write must succeed"); + + cache.put("c", tmp2.path()).expect("put must succeed"); + } + + // sharded.a => "sharded" + // sharded.b => "sharded2" + // plain.b => "plain" + // plain.c => "plain2" + + // Read from sharded, then plain. + { + let ro = ReadOnlyCacheBuilder::new() + .add_sharded(temp.path("sharded"), 10) + .add_plain(temp.path("plain")) + .build(); + + assert!(matches!(ro.get(&TestKey::new("Missing")), Ok(None))); + assert!(matches!(ro.touch(&TestKey::new("Missing")), Ok(false))); + + // We should be able to touch `a`. + assert!(matches!(ro.touch(&TestKey::new("a")), Ok(true))); + + // And now check that we get the correct file contents. + { + let mut a_file = ro + .get(&TestKey::new("a")) + .expect("must succeed") + .expect("must exist"); + let mut dst = Vec::new(); + a_file.read_to_end(&mut dst).expect("read must succeed"); + assert_eq!(&dst, b"sharded"); + } + + { + let mut b_file = ro + .get(&TestKey::new("b")) + .expect("must succeed") + .expect("must exist"); + let mut dst = Vec::new(); + b_file.read_to_end(&mut dst).expect("read must succeed"); + assert_eq!(&dst, b"sharded2"); + } + + { + let mut c_file = ro + .get(&TestKey::new("c")) + .expect("must succeed") + .expect("must exist"); + let mut dst = Vec::new(); + c_file.read_to_end(&mut dst).expect("read must succeed"); + assert_eq!(&dst, b"plain2"); + } + } + + // Read from plain then sharded. + { + let ro = ReadOnlyCacheBuilder::new() + .add(temp.path("plain"), 1) + .add(temp.path("sharded"), 10) + .build(); + + { + let mut a_file = ro + .get(&TestKey::new("a")) + .expect("must succeed") + .expect("must exist"); + let mut dst = Vec::new(); + a_file.read_to_end(&mut dst).expect("read must succeed"); + assert_eq!(&dst, b"sharded"); + } + + { + let mut b_file = ro + .get(&TestKey::new("b")) + .expect("must succeed") + .expect("must exist"); + let mut dst = Vec::new(); + b_file.read_to_end(&mut dst).expect("read must succeed"); + assert_eq!(&dst, b"plain"); + } + + { + let mut c_file = ro + .get(&TestKey::new("c")) + .expect("must succeed") + .expect("must exist"); + let mut dst = Vec::new(); + c_file.read_to_end(&mut dst).expect("read must succeed"); + assert_eq!(&dst, b"plain2"); + } + } + } +}