Skip to content

Commit

Permalink
tar/import: Handle hardlinked changes into /sysroot
Browse files Browse the repository at this point in the history
Some container build processes run without overlayfs inode indexing
on - in this common scenario, overlayfs is not quite POSIX compliant
because it will break the hardlink instead of modifying all versions.

We need to handle this case of having *all* names for a hardlinked
file being modified though too.  If the serialized tar stream
has the file in `/sysroot` be the canonical version, then because
we drop out that file here, we'll fail to import.

Fix this by significantly beefing up the tar filtering/reprocessing
logic:

 - When we see a *modified* file in `/sysroot` with a nonzero timestamp,
   cache its data into a lookaside temporary directory
 - If we then see a hardlink to that file path, make *that* file
   be the canonical version in e.g. `/usr`.
 - Any further hardlinks to `/sysroot` instead become hardlinks
   to the new canonical one.

(Arguably perhaps...we should actually not have used hardlinks
 in ostree containers at all, but injected this metadata in
 some other way.  But, the ship has sailed on that)

Closes: #405
  • Loading branch information
cgwalters committed Nov 11, 2022
1 parent 258cc97 commit 43e1648
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 9 deletions.
2 changes: 1 addition & 1 deletion lib/src/tar/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const MAX_METADATA_SIZE: u32 = 10 * 1024 * 1024;
pub(crate) const SMALL_REGFILE_SIZE: usize = 127 * 1024;

// The prefix for filenames that contain content we actually look at.
const REPO_PREFIX: &str = "sysroot/ostree/repo/";
pub(crate) const REPO_PREFIX: &str = "sysroot/ostree/repo/";
/// Statistics from import.
#[derive(Debug, Default)]
struct ImportStats {
Expand Down
73 changes: 70 additions & 3 deletions lib/src/tar/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@
use crate::Result;
use anyhow::{anyhow, Context};
use camino::{Utf8Component, Utf8Path, Utf8PathBuf};

use cap_std_ext::cap_std;
use cap_std_ext::cmdext::CapStdExtCommandExt;
use cap_std_ext::rustix;
use once_cell::unsync::OnceCell;
use ostree::gio;
use ostree::prelude::FileExt;
use rustix::fd::FromFd;
use std::collections::BTreeMap;
use std::io::{BufWriter, Write};
use std::collections::{BTreeMap, HashMap};
use std::io::{BufWriter, Seek, Write};
use std::path::Path;
use std::process::Stdio;

use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite};
use tracing::instrument;
Expand Down Expand Up @@ -164,11 +168,74 @@ pub(crate) fn filter_tar(
let mut filtered = BTreeMap::new();

let ents = src.entries()?;

// Lookaside data for dealing with hardlinked files into /sysroot; see below.
let mut changed_sysroot_objects = HashMap::new();
let mut new_sysroot_link_targets = HashMap::<Utf8PathBuf, Utf8PathBuf>::new();
// A temporary directory if needed
let tmpdir = OnceCell::new();

for entry in ents {
let entry = entry?;
let mut entry = entry?;
let header = entry.header();
let path = entry.path()?;
let path: &Utf8Path = (&*path).try_into()?;

let is_modified = header.mtime().unwrap_or_default() > 0;
let is_regular = header.entry_type() == tar::EntryType::Regular;
if path.strip_prefix(crate::tar::REPO_PREFIX).is_ok() {
// If it's a modified file in /sysroot, it may be a target for future hardlinks.
// In that case, we copy the data off to a temporary file. Then the first hardlink
// to it becomes instead the real file, and any *further* hardlinks refer to that
// file instead.
if is_modified && is_regular {
tracing::debug!("Processing modified sysroot file {path}");
// Lazily allocate a temporary directory
let tmpdir = tmpdir.get_or_try_init(|| {
let vartmp = &cap_std::fs::Dir::open_ambient_dir(
"/var/tmp",
cap_std::ambient_authority(),
)?;
cap_tempfile::tempdir_in(vartmp)
})?;
// Create an O_TMPFILE (anonymous file) to use as a temporary store for the file data
let mut tmpf = cap_tempfile::TempFile::new_anonymous(tmpdir).map(BufWriter::new)?;
let path = path.to_owned();
let header = header.clone();
std::io::copy(&mut entry, &mut tmpf)?;
let mut tmpf = tmpf.into_inner()?;
tmpf.seek(std::io::SeekFrom::Start(0))?;
// Cache this data, indexed by the file path
changed_sysroot_objects.insert(path, (header, tmpf));
continue;
}
} else if header.entry_type() == tar::EntryType::Link && is_modified {
let target = header
.link_name()?
.ok_or_else(|| anyhow!("Invalid empty hardlink"))?;
let target: &Utf8Path = (&*target).try_into()?;
// If this is a hardlink into /sysroot...
if target.strip_prefix(crate::tar::REPO_PREFIX).is_ok() {
// And we found a previously processed modified file there
if let Some((mut header, data)) = changed_sysroot_objects.remove(target) {
tracing::debug!("Making {path} canonical for sysroot link {target}");
// Make *this* entry the canonical one, consuming the temporary file data
dest.append_data(&mut header, path, data)?;
// And cache this file path as the new link target
new_sysroot_link_targets.insert(target.to_owned(), path.to_owned());
} else if let Some(target) = new_sysroot_link_targets.get(path) {
tracing::debug!("Relinking {path} to {target}");
// We found a 2nd (or 3rd, etc.) link into /sysroot; rewrite the link
// target to be the first file outside of /sysroot we found.
let mut header = header.clone();
dest.append_link(&mut header, path, target)?;
} else {
tracing::debug!("Found unhandled modified link from {path} to {target}");
}
continue;
}
}

let normalized = match normalize_validate_path(path)? {
NormalizedPathResult::Filtered(path) => {
if let Some(v) = filtered.get_mut(path) {
Expand Down
16 changes: 11 additions & 5 deletions lib/tests/it/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1261,11 +1261,17 @@ async fn test_container_write_derive_sysroot_hardlink() -> Result<()> {
store::PrepareResult::AlreadyPresent(_) => panic!("should not be already imported"),
store::PrepareResult::Ready(r) => r,
};
// Should fail for now
assert_err_contains(
imp.import(prep).await,
"Failed to find object: No such file or directory: sysroot",
);
let import = imp.import(prep).await.unwrap();

// Verify we have the new file
bash_in!(
&fixture.dir,
r#"set -x;
ostree --repo=dest/repo ls ${r} /usr/bin/bash >/dev/null
test "$(ostree --repo=dest/repo cat ${r} /usr/bin/bash)" = "hello"
"#,
r = import.merge_commit.as_str()
)?;

Ok(())
}
Expand Down

0 comments on commit 43e1648

Please sign in to comment.