Skip to content

Commit

Permalink
readonly: new public interface for a stack of read-only cache dirs
Browse files Browse the repository at this point in the history
When storing cache data in filesystem directories, it often makes
sense to pull from a few read-only directories, e.g., when migrating
from one cache scheme to another, or when reading from another
program's cache.

The `ReadOnlyCache` abstracts that process, while hiding the
difference in type between plain and sharded caches.

TESTED=new smoke test.
  • Loading branch information
pkhuong committed Sep 6, 2021
1 parent e5dca69 commit 3030efd
Show file tree
Hide file tree
Showing 2 changed files with 348 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
mod cache_dir;
mod plain;
pub mod raw_cache;
mod readonly;
pub mod second_chance;
mod sharded;
mod trigger;

pub use plain::PlainCache;
pub use readonly::ReadOnlyCache;
pub use readonly::ReadOnlyCacheBuilder;
pub use sharded::ShardedCache;

/// Sharded cache keys consist of a filename and two hash values. The
Expand Down
345 changes: 345 additions & 0 deletions src/readonly.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
//! A `ReadOnlyCache` wraps an arbitrary number of caches, and
//! attempts to satisfy `get` and `touch` requests by hitting each
//! cache in order. For read-only use-cases, this should be a simple
//! and easy-to-use interface that erases the difference between plain
//! and sharded caches.
use std::fs::File;
use std::io::Result;
use std::path::Path;
use std::sync::Arc;

use crate::Key;
use crate::PlainCache;
use crate::ShardedCache;

/// The `ReadSide` trait offers `get` and `touch`, as implemented by
/// both plain and sharded caches.
trait ReadSide: std::fmt::Debug {
/// Returns a read-only file for `key` in the cache directory if
/// it exists, or None if there is no such file.
///
/// Implicitly "touches" the cached file if it exists.
fn get(&self, key: Key) -> Result<Option<File>>;

/// Marks the cached file `key` as newly used, if it exists.
///
/// Returns whether a file for `key` exists in the cache.
fn touch(&self, key: Key) -> Result<bool>;
}

impl ReadSide for PlainCache {
fn get(&self, key: Key) -> Result<Option<File>> {
PlainCache::get(self, key.name)
}

fn touch(&self, key: Key) -> Result<bool> {
PlainCache::touch(self, key.name)
}
}

impl ReadSide for ShardedCache {
fn get(&self, key: Key) -> Result<Option<File>> {
ShardedCache::get(self, key)
}

fn touch(&self, key: Key) -> Result<bool> {
ShardedCache::touch(self, key)
}
}

/// Construct a `ReadOnlyCache` with this builder. The resulting
/// cache will access each constituent cache directory in the order
/// they were added.
#[derive(Debug)]
pub struct ReadOnlyCacheBuilder {
stack: Vec<Box<dyn ReadSide>>,
}

impl ReadOnlyCacheBuilder {
/// Returns a fresh empty builder.
pub fn new() -> Self {
Self { stack: Vec::new() }
}

/// Adds a new cache directory at `path` to the end of the cache
/// builder's search list.
///
/// Adds a plain cache directory if `num_shards <= 1`, and an
/// actual sharded directory otherwise.
pub fn add(self, path: impl AsRef<Path>, num_shards: usize) -> Self {
if num_shards <= 1 {
self.add_plain(path)
} else {
self.add_sharded(path, num_shards)
}
}

/// Adds a new plain cache directory at `path` to the end of the
/// cache builder's search list. A plain cache directory is
/// merely a directory of files where the files' names match their
/// key's name.
pub fn add_plain(mut self, path: impl AsRef<Path>) -> Self {
self.stack.push(Box::new(PlainCache::new(
path.as_ref().to_owned(),
usize::MAX,
)));

self
}

/// Adds a new sharded cache directory at `path` to the end of the
/// cache builder's search list.
pub fn add_sharded(mut self, path: impl AsRef<Path>, num_shards: usize) -> Self {
self.stack.push(Box::new(ShardedCache::new(
path.as_ref().to_owned(),
num_shards,
usize::MAX,
)));
self
}

/// Returns a fresh `ReadOnlyCache` for the builder's search list
/// of constituent cache directories.
pub fn build(self) -> ReadOnlyCache {
ReadOnlyCache::new(self.stack)
}
}

/// A `ReadOnlyCache` wraps an arbitrary number of caches, and
/// attempts to satisfy `get` and `touch` requests by hitting each
/// constituent cache in order. This interface hides the difference
/// between plain and sharded cache directories, and should be the
/// first resort for read-only uses.
#[derive(Clone, Debug)]
pub struct ReadOnlyCache {
stack: Arc<[Box<dyn ReadSide>]>,
}

impl ReadOnlyCache {
fn new(stack: Vec<Box<dyn ReadSide>>) -> ReadOnlyCache {
ReadOnlyCache {
stack: stack.into_boxed_slice().into(),
}
}

/// Attempts to open a read-only file for `key`. The
/// `ReadOnlyCache` will query each constituent cache in order of
/// registration, and return a read-only file for the first hit.
///
/// Returns `None` if no file for `key` can be found in any of the
/// constituent caches, and bubbles up the first I/O error
/// encountered, if any.
pub fn get<'a>(&self, key: impl Into<Key<'a>>) -> Result<Option<File>> {
fn doit(stack: &[Box<dyn ReadSide>], key: Key) -> Result<Option<File>> {
for cache in stack.iter() {
if let Some(ret) = cache.get(key)? {
return Ok(Some(ret));
}
}

Ok(None)
}

doit(&*self.stack, key.into())
}

/// Marks a cache entry for `key` as accessed (read). The
/// `ReadOnlyCache` will touch the same file that would be returned
/// by `get`.
///
/// Returns whether a file for `key` could be found, and bubbles
/// up the first I/O error encountered, if any.
pub fn touch<'a>(&self, key: impl Into<Key<'a>>) -> Result<bool> {
fn doit(stack: &[Box<dyn ReadSide>], key: Key) -> Result<bool> {
for cache in stack.iter() {
if cache.touch(key)? {
return Ok(true);
}
}

Ok(false)
}

doit(&*self.stack, key.into())
}
}

#[cfg(test)]
mod test {
use crate::Key;
use crate::PlainCache;
use crate::ReadOnlyCacheBuilder;
use crate::ShardedCache;

struct TestKey {
key: String,
}

impl TestKey {
fn new(key: &str) -> TestKey {
TestKey {
key: key.to_string(),
}
}
}

impl<'a> From<&'a TestKey> for Key<'a> {
fn from(x: &'a TestKey) -> Key<'a> {
Key::new(&x.key, 0, 1)
}
}

/// An empty stack should always succeed with a trivial result.
#[test]
fn empty() {
let ro = ReadOnlyCacheBuilder::new().build();

assert!(matches!(ro.get(Key::new("foo", 1, 2)), Ok(None)));
assert!(matches!(ro.touch(Key::new("foo", 1, 2)), Ok(false)));
}

/// Populate a plain and a sharded cache. We should be able to access
/// both.
#[test]
fn smoke_test() {
use std::io::{Read, Write};
use tempfile::NamedTempFile;
use test_dir::{DirBuilder, FileType, TestDir};

let temp = TestDir::temp()
.create("sharded", FileType::Dir)
.create("plain", FileType::Dir);

{
let cache = ShardedCache::new(temp.path("sharded"), 10, 20);

let tmp = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp.as_file()
.write_all(b"sharded")
.expect("write must succeed");

cache
.put(Key::new("a", 0, 1), tmp.path())
.expect("put must succeed");

let tmp2 = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp2.as_file()
.write_all(b"sharded2")
.expect("write must succeed");

cache
.put(Key::new("b", 0, 1).into(), tmp2.path())
.expect("put must succeed");
}

{
let cache = PlainCache::new(temp.path("plain"), 10);

let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp.as_file()
.write_all(b"plain")
.expect("write must succeed");

cache.put("b", tmp.path()).expect("put must succeed");

let tmp2 = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
.expect("new temp file must succeed");
tmp2.as_file()
.write_all(b"plain2")
.expect("write must succeed");

cache.put("c", tmp2.path()).expect("put must succeed");
}

// sharded.a => "sharded"
// sharded.b => "sharded2"
// plain.b => "plain"
// plain.c => "plain2"

// Read from sharded, then plain.
{
let ro = ReadOnlyCacheBuilder::new()
.add_sharded(temp.path("sharded"), 10)
.add_plain(temp.path("plain"))
.build();

assert!(matches!(ro.get(&TestKey::new("Missing")), Ok(None)));
assert!(matches!(ro.touch(&TestKey::new("Missing")), Ok(false)));

// We should be able to touch `a`.
assert!(matches!(ro.touch(&TestKey::new("a")), Ok(true)));

// And now check that we get the correct file contents.
{
let mut a_file = ro
.get(&TestKey::new("a"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
a_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"sharded");
}

{
let mut b_file = ro
.get(&TestKey::new("b"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
b_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"sharded2");
}

{
let mut c_file = ro
.get(&TestKey::new("c"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
c_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"plain2");
}
}

// Read from plain then sharded.
{
let ro = ReadOnlyCacheBuilder::new()
.add(temp.path("plain"), 1)
.add(temp.path("sharded"), 10)
.build();

{
let mut a_file = ro
.get(&TestKey::new("a"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
a_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"sharded");
}

{
let mut b_file = ro
.get(&TestKey::new("b"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
b_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"plain");
}

{
let mut c_file = ro
.get(&TestKey::new("c"))
.expect("must succeed")
.expect("must exist");
let mut dst = Vec::new();
c_file.read_to_end(&mut dst).expect("read must succeed");
assert_eq!(&dst, b"plain2");
}
}
}
}

0 comments on commit 3030efd

Please sign in to comment.