Skip to content

Commit

Permalink
namespace: rework how we resolve symlinks in mount points
Browse files Browse the repository at this point in the history
Before this patch we'd resolve all symlinks of bind mounts and other
mount points to establish for a service in advance, and only then start
mounting them. This is problematic, if symlink chains jump around
between directories in a namespace tree, so that to resolve a specific
symlink chain we need to establish another mount already. A typical case
where this happens is if /etc/resolv.conf is a symlink to some file in
/run: in that case we'd normally resolve and mount /etc/resolv.conf
early on, but that's broken, as to do this properly we'd need to resolve
/etc/resolv.conf first, then figure out that /run needs to be mounted
before we can proceed, and thus reorder the order in which we apply
mounts dynamically.

With this change, whenever we are about to apply a mount, we'll do a
single step of the symlink normalization process, patch the mount entry
accordingly, and then sort the list of mounts to establish again, taking
the new path into account. This means that we can correctly deal with
the example above: we might start with wanting to mount /etc/resolv.conf
early, but after resolving it to the path in /run/ we'd push it to the
end of the list, ensuring that /run is mounted first.

(Note that this also fixes another bug: we were following symlinks on
the bind mount source relative to the root directory of the service,
rather than of the host. That's wrong though as we explicitly document
tha the source of bind mounts is always on the host.)
  • Loading branch information
poettering committed Apr 18, 2018
1 parent e871786 commit 088696f
Showing 1 changed file with 84 additions and 38 deletions.
122 changes: 84 additions & 38 deletions src/core/namespace.c
Expand Up @@ -61,12 +61,14 @@ typedef struct MountEntry {
bool ignore:1; /* Ignore if path does not exist? */
bool has_prefix:1; /* Already is prefixed by the root dir? */
bool read_only:1; /* Shall this mount point be read-only? */
bool applied:1; /* Already applied */
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
const char *source_const; /* The source path, for bind mounts */
char *source_malloc;
const char *options_const;/* Mount options for tmpfs */
char *options_malloc;
unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */
unsigned n_followed;
} MountEntry;

/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
Expand Down Expand Up @@ -410,7 +412,6 @@ static int mount_path_compare(const void *a, const void *b) {
/* If the paths are equal, check the mode */
if (p->mode < q->mode)
return -1;

if (p->mode > q->mode)
return 1;

Expand Down Expand Up @@ -454,8 +455,10 @@ static void drop_duplicates(MountEntry *m, unsigned *n) {
for (f = m, t = m, previous = NULL; f < m + *n; f++) {

/* The first one wins (which is the one with the more restrictive mode), see mount_path_compare()
* above. */
if (previous && path_equal(mount_entry_path(f), mount_entry_path(previous))) {
* above. Note that we only drop duplicates that haven't been mounted yet. */
if (previous &&
path_equal(mount_entry_path(f), mount_entry_path(previous)) &&
!f->applied && !previous->applied) {
log_debug("%s is duplicate.", mount_entry_path(f));
previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */
mount_entry_done(f);
Expand Down Expand Up @@ -817,36 +820,37 @@ static int mount_tmpfs(const MountEntry *m) {
return 1;
}

static int mount_entry_chase(
static int follow_symlink(
const char *root_directory,
const MountEntry *m,
const char *path,
bool chase_nonexistent,
char **location) {
MountEntry *m) {

char *chased;
_cleanup_free_ char *target = NULL;
int r;

assert(m);
/* Let's chase symlinks, but only one step at a time. That's because depending where the symlink points we
* might need to change the order in which we mount stuff. Hence: let's normalize piecemeal, and do one step at
* a time by specifying CHASE_STEP. This function returns 0 if we resolved one step, and > 0 if we reached the
* end and already have a fully normalized name. */

/* Since mount() will always follow symlinks and we need to take the different root directory into account we
* chase the symlinks on our own first. This is called for the destination path, as well as the source path (if
* that applies). The result is stored in "location". */
r = chase_symlinks(mount_entry_path(m), root_directory, CHASE_STEP|CHASE_NONEXISTENT, &target);
if (r < 0)
return log_debug_errno(r, "Failed to chase symlinks '%s': %m", mount_entry_path(m));
if (r > 0) /* Reached the end, nothing more to resolve */
return 1;

r = chase_symlinks(path, root_directory, CHASE_TRAIL_SLASH | (chase_nonexistent ? CHASE_NONEXISTENT : 0), &chased);
if (r == -ENOENT && m->ignore) {
log_debug_errno(r, "Path %s does not exist, ignoring.", path);
return 0;
if (m->n_followed >= CHASE_SYMLINKS_MAX) { /* put a boundary on things */
log_debug("Symlink loop on '%s'.", mount_entry_path(m));
return -ELOOP;
}
if (r < 0)
return log_debug_errno(r, "Failed to follow symlinks on %s: %m", path);

log_debug("Followed symlinks %s → %s.", path, chased);
log_debug("Followed mount entry path symlink %s → %s.", mount_entry_path(m), target);

free(*location);
*location = chased;
free_and_replace(m->path_malloc, target);
m->has_prefix = true;

return 1;
m->n_followed ++;

return 0;
}

static int apply_mount(
Expand All @@ -859,10 +863,6 @@ static int apply_mount(

assert(m);

r = mount_entry_chase(root_directory, m, mount_entry_path(m), !IN_SET(m->mode, INACCESSIBLE, READONLY, READWRITE), &m->path_malloc);
if (r <= 0)
return r;

log_debug("Applying namespace mount on %s", mount_entry_path(m));

switch (m->mode) {
Expand All @@ -875,8 +875,12 @@ static int apply_mount(
* inaccessible path. */
(void) umount_recursive(mount_entry_path(m), 0);

if (lstat(mount_entry_path(m), &target) < 0)
if (lstat(mount_entry_path(m), &target) < 0) {
if (errno == ENOENT && m->ignore)
return 0;

return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m));
}

what = mode_to_inaccessible_node(target.st_mode);
if (!what) {
Expand All @@ -889,6 +893,8 @@ static int apply_mount(
case READONLY:
case READWRITE:
r = path_is_mount_point(mount_entry_path(m), root_directory, 0);
if (r == -ENOENT && m->ignore)
return 0;
if (r < 0)
return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m));
if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */
Expand All @@ -901,16 +907,29 @@ static int apply_mount(
rbind = false;

_fallthrough_;
case BIND_MOUNT_RECURSIVE:
/* Also chase the source mount */
case BIND_MOUNT_RECURSIVE: {
_cleanup_free_ char *chased = NULL;

r = mount_entry_chase(root_directory, m, mount_entry_source(m), false, &m->source_malloc);
if (r <= 0)
return r;
/* Since mount() will always follow symlinks we chase the symlinks on our own first. Note that bind
* mount source paths are always relative to the host root, hence we pass NULL as root directory to
* chase_symlinks() here. */

r = chase_symlinks(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased);
if (r == -ENOENT && m->ignore) {
log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m));
return 0;
}
if (r < 0)
return log_debug_errno(r, "Failed to follow symlinks on %s: %m", mount_entry_source(m));

log_debug("Followed source symlinks %s → %s.", mount_entry_source(m), chased);

free_and_replace(m->source_malloc, chased);

what = mount_entry_source(m);
make = true;
break;
}

case EMPTY_DIR:
case TMPFS:
Expand Down Expand Up @@ -1340,11 +1359,38 @@ int setup_namespace(
goto finish;
}

/* First round, add in all special mounts we need */
for (m = mounts; m < mounts + n_mounts; ++m) {
r = apply_mount(root, m);
if (r < 0)
goto finish;
/* First round, establish all mounts we need */
for (;;) {
bool again = false;

for (m = mounts; m < mounts + n_mounts; ++m) {

if (m->applied)
continue;

r = follow_symlink(root, m);
if (r < 0)
goto finish;
if (r == 0) {
/* We hit a symlinked mount point. The entry got rewritten and might point to a
* very different place now. Let's normalize the changed list, and start from
* the beginning. After all to mount the entry at the new location we might
* need some other mounts first */
again = true;
break;
}

r = apply_mount(root, m);
if (r < 0)
goto finish;

m->applied = true;
}

if (!again)
break;

normalize_mounts(root_directory, mounts, &n_mounts);
}

/* Create a blacklist we can pass to bind_mount_recursive() */
Expand Down

0 comments on commit 088696f

Please sign in to comment.