Skip to content

Commit

Permalink
stack: check close(2) before publishing NamedTempFile to cache
Browse files Browse the repository at this point in the history
Close *can* fail for real I/O reasons, and while it would be ideal
to always fsync:

1. that's an (opt-out) option
2. it can still be nice to detect post-fsync failures
3. with NFS close-to-open consistency, closing is the most reliable
   way to make the file's contents globally visible.

In general, Kismet works on paths, not files, so there's not much
we can do about closing (or not) the underlying File object before
publishing it.

For `NamedTempFile`s though, we get both a file and a path!  We can
extract the file, make sure it's ready for publication (world readable,
fsync-ed if necessary, and then closed) before passing the `TempPath`
to Kismet's usual path-based machinery.

This `TempPath` is what keeps the temporary file path alive, so everything
will still be cleaned up on error, even though the file descriptor
itself is long gone.

Of course, Rust doesn't yet have an `Error`-ful `File::close`, so we
roll our own with libc...
  • Loading branch information
pkhuong committed Sep 10, 2023
1 parent 992c039 commit 8d0bb40
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 51 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -11,6 +11,7 @@ repository = "https://github.com/pkhuong/kismet-cache"
derivative = "2"
extendhash = "1"
filetime = "0.2"
libc = "0.2"
rand = "0.8"
tempfile = "3"

Expand Down
122 changes: 71 additions & 51 deletions src/stack.rs
Expand Up @@ -393,46 +393,77 @@ impl CacheBuilder {
}
}

/// Attempts to set the permissions on `file` to `0444`: the tempfile
/// crate always overrides to 0600 when possible, but that doesn't
/// really make sense for kismet: we don't want cache entries we can
/// tell exist, but can't access. Access control should happen via
/// permissions on the cache directory.
/// Prepares a NamedTempFile for publishing as a cached value.
///
/// The file must be made world readable (0600 probably isn't right),
/// fsync-ed if `sync` is true, and then closed... but the path on
/// disk itself kept alive and returned to the caller.
///
/// This is the only place where Kismet is expected to loosen file
/// access permissions: Kismet does tweak permissions just before
/// renaming/linking files to their final destination, but only to
/// *remove* write access, never to add it.
#[cfg(all(target_family = "unix", not(target_os = "wasi")))] // https://github.com/Stebalien/tempfile/blob/8ae928c5f9719664a2e40f8f6c904eab06db122b/src/file/imp/unix.rs#L25C15-L25C33
fn fix_tempfile_permissions(file: &NamedTempFile) -> Result<()> {
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
///
/// N.B., the permissions are always set to `0444`, regardless of
/// umash. Access control should happen via permissions on the cache
/// directory: it's not useful (or maybe even an information leak)
/// to be able to tell an entry exists without accessing it.
fn finalize_tempfile(tempfile: NamedTempFile, sync: bool) -> Result<tempfile::TempPath> {
#[cfg(all(target_family = "unix", not(target_os = "wasi")))] // https://github.com/Stebalien/tempfile/blob/8ae928c5f9719664a2e40f8f6c904eab06db122b/src/file/imp/unix.rs#L25C15-L25C33
fn fix_tempfile_permissions(file: &NamedTempFile) -> Result<()> {
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;

file.as_file()
.set_permissions(Permissions::from_mode(0o444))
}

file.as_file()
.set_permissions(Permissions::from_mode(0o444))
}
#[cfg(target_os = "wasi")]
fn fix_tempfile_permissions(file: &NamedTempFile) -> Result<()> {
Ok(())
}

#[cfg(target_os = "wasi")]
fn fix_tempfile_permissions(file: &NamedTempFile) -> Result<()> {
Ok(())
}
#[cfg(not(target_family = "unix"))]
fn fix_tempfile_permissions(_: &NamedTempFile) -> Result<()> {
unimplemented!("Don't know how to fix tempfile permissions on non-unix OSes.");
Ok(())
}

fix_tempfile_permissions(&tempfile)?;

let (file, path) = tempfile.into_parts();
if sync {
file.sync_all()?;
}

// On unix, closing a file can fail and signal important
// information (e.g., with NFS)... bubble that up.
#[cfg(target_family = "unix")]
fn close(file: File) -> Result<()> {
use std::os::fd::IntoRawFd;
if unsafe { libc::close(file.into_raw_fd()) } == 0 {
Ok(())
} else {
Err(Error::last_os_error())
}
}

#[cfg(not(target_family = "unix"))]
fn fix_tempfile_permissions(_: &NamedTempFile) -> Result<()> {
unimplemented!("Don't know how to fix tempfile permissions on non-unix OSes.");
Ok(())
#[cfg(not(target_family = "unix"))]
fn close(file: File) -> Result<()> {
Ok(())
}

close(file)?;
Ok(path)
}

impl Cache {
/// Calls [`File::sync_all`] on `file` if `Cache::auto_sync`
/// is true.
/// Finalizes a [`NamedTempFile`]; that includes calling
/// [`File::sync_all`] on the underlying file if
/// `Cache::auto_sync` is true.
#[inline]
fn maybe_sync(&self, file: &File) -> Result<()> {
if self.auto_sync {
file.sync_all()
} else {
Ok(())
}
fn finalize_tempfile(&self, file: NamedTempFile) -> Result<tempfile::TempPath> {
finalize_tempfile(file, self.auto_sync)
}

/// Opens `path` and calls [`File::sync_all`] on the resulting
Expand Down Expand Up @@ -571,18 +602,10 @@ impl Cache {
fn promote(cache: &dyn FullCache, sync: bool, key: Key, mut file: File) -> Result<File> {
let mut tmp = NamedTempFile::new_in(cache.temp_dir(key)?)?;
std::io::copy(&mut file, tmp.as_file_mut())?;
fix_tempfile_permissions(&tmp)?;
let path = finalize_tempfile(tmp, sync)?;
cache.put(key, &path)?;

// Force the destination file's contents to disk before
// adding it to the read-write cache, if we're supposed to
// sync files automatically.
if sync {
tmp.as_file().sync_all()?;
}

cache.put(key, tmp.path())?;

// We got a read-only file. Rewind it before returning.
// We already have a read-only file. Rewind it before returning.
file.seek(SeekFrom::Start(0))?;
Ok(file)
}
Expand Down Expand Up @@ -682,16 +705,15 @@ impl Cache {
// Either way, start by populating a temporary file.
let mut tmp = NamedTempFile::new_in(cache.temp_dir(key)?)?;
populate(tmp.as_file_mut(), old)?;
fix_tempfile_permissions(&tmp)?;
self.maybe_sync(tmp.as_file())?;

// Grab a read-only return value before publishing the file.
let path = tmp.path();
let mut ret = File::open(path)?;
let mut ret = File::open(tmp.path())?;

let path = self.finalize_tempfile(tmp)?;
if replace {
cache.set(key, path)?;
cache.set(key, &path)?;
} else {
cache.put(key, path)?;
cache.put(key, &path)?;
// Return the now-cached file, if we can get it.
if let Ok(Some(file)) = cache.get(key) {
ret = file;
Expand Down Expand Up @@ -750,9 +772,8 @@ impl Cache {
/// and we fail to [`File::sync_all`] the [`NamedTempFile`] value.
pub fn set_temp_file<'a>(&self, key: impl Into<Key<'a>>, value: NamedTempFile) -> Result<()> {
fn doit(this: &Cache, key: Key, value: NamedTempFile) -> Result<()> {
fix_tempfile_permissions(&value)?;
this.maybe_sync(value.as_file())?;
this.set_impl(key, value.path())
let path = this.finalize_tempfile(value)?;
this.set_impl(key, &path)
}

doit(self, key.into(), value)
Expand Down Expand Up @@ -808,9 +829,8 @@ impl Cache {
/// and we fail to [`File::sync_all`] the [`NamedTempFile`] value.
pub fn put_temp_file<'a>(&self, key: impl Into<Key<'a>>, value: NamedTempFile) -> Result<()> {
fn doit(this: &Cache, key: Key, value: NamedTempFile) -> Result<()> {
fix_tempfile_permissions(&value)?;
this.maybe_sync(value.as_file())?;
this.put_impl(key, value.path())
let path = this.finalize_tempfile(value)?;
this.put_impl(key, &path)
}

doit(self, key.into(), value)
Expand Down

0 comments on commit 8d0bb40

Please sign in to comment.