Skip to content

Commit

Permalink
namespace: if RootDirectory= is used, mount /proc, /sys, /dev if not …
Browse files Browse the repository at this point in the history
…configured otherwise

If the admin didn't explicitly mount something to /proc, /sys and /dev, be nice
and mount the right file system there for him. This should make RootDirectory=
easier to use.

In order to minimize churn this will mount these dirs only if there's nothing
mounted at these dirs yet, which may be done either externally, or via
systemd's own settings.

This slightly alters the BindMount structure, adding a new read_only bit to it.
This is used when dropping duplicate mounts: whenever we encounter a duplicate
mount, and the one we drop is a read-only mount, then also make the one we keep
read-only. This effectively means we need to be able to combine the "read-only"
flag with any kind of entry.
  • Loading branch information
poettering committed Dec 2, 2016
1 parent 7d583ec commit 01c33c1
Showing 1 changed file with 107 additions and 12 deletions.
119 changes: 107 additions & 12 deletions src/core/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,22 @@
typedef enum MountMode {
/* This is ordered by priority! */
INACCESSIBLE,
READONLY,
PRIVATE_TMP,
PRIVATE_VAR_TMP,
PRIVATE_DEV,
BIND_DEV,
SYSFS,
PROCFS,
READONLY,
READWRITE,
} MountMode;

typedef struct BindMount {
const char *path_const; /* Memory allocated on stack or static */
MountMode mode:6;
MountMode mode:5;
bool ignore:1; /* Ignore if path does not exist? */
bool has_prefix:1; /* Already is prefixed by the root dir? */
bool read_only:1; /* Shall this mount point be read-only? */
char *path_malloc; /* Use this instead of 'path' if we had to allocate memory */
} BindMount;

Expand Down Expand Up @@ -150,6 +154,14 @@ static const BindMount protect_system_strict_table[] = {
{ "/root", READWRITE, true }, /* ProtectHome= */
};

/* If RootDirectory= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
* something there already. These mounts are hence overriden by any other explicitly configured mounts. */
static const BindMount root_directory_table[] = {
{ "/proc", PROCFS, false },
{ "/dev", BIND_DEV, false },
{ "/sys", SYSFS, false },
};

static const char *bind_mount_path(const BindMount *p) {
assert(p);

Expand All @@ -159,6 +171,12 @@ static const char *bind_mount_path(const BindMount *p) {
return p->path_malloc ?: p->path_const;
}

static bool bind_mount_read_only(const BindMount *p) {
assert(p);

return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE);
}

static int append_access_mounts(BindMount **p, char **strv, MountMode mode) {
char **i;

Expand Down Expand Up @@ -314,6 +332,7 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
* above. */
if (previous && path_equal(bind_mount_path(f), bind_mount_path(previous))) {
log_debug("%s is duplicate.", bind_mount_path(f));
previous->read_only = previous->read_only || bind_mount_read_only(f); /* Propagate the read-only flag to the remaining entry */
f->path_malloc = mfree(f->path_malloc);
continue;
}
Expand Down Expand Up @@ -421,7 +440,7 @@ static void drop_outside_root(const char *root_directory, BindMount *m, unsigned
*n = t - m;
}

static int mount_dev(BindMount *m) {
static int mount_private_dev(BindMount *m) {
static const char devnodes[] =
"/dev/null\0"
"/dev/zero\0"
Expand Down Expand Up @@ -560,6 +579,62 @@ static int mount_dev(BindMount *m) {
return r;
}

static int mount_bind_dev(BindMount *m) {
int r;

assert(m);

/* Implements the little brother of mount_private_dev(): simply bind mounts the host's /dev into the service's
* /dev. This is only used when RootDirectory= is set. */

r = path_is_mount_point(bind_mount_path(m), NULL, 0);
if (r < 0)
return log_debug_errno(r, "Unable to determine whether /dev is already mounted: %m");
if (r > 0) /* make this a NOP if /dev is already a mount point */
return 0;

if (mount("/dev", bind_mount_path(m), NULL, MS_BIND|MS_REC, NULL) < 0)
return log_debug_errno(errno, "Failed to bind mount %s: %m", bind_mount_path(m));

return 1;
}

static int mount_sysfs(BindMount *m) {
int r;

assert(m);

r = path_is_mount_point(bind_mount_path(m), NULL, 0);
if (r < 0)
return log_debug_errno(r, "Unable to determine whether /sys is already mounted: %m");
if (r > 0) /* make this a NOP if /sys is already a mount point */
return 0;

/* Bind mount the host's version so that we get all child mounts of it, too. */
if (mount("/sys", bind_mount_path(m), NULL, MS_BIND|MS_REC, NULL) < 0)
return log_debug_errno(errno, "Failed to mount %s: %m", bind_mount_path(m));

return 1;
}

static int mount_procfs(BindMount *m) {
int r;

assert(m);

r = path_is_mount_point(bind_mount_path(m), NULL, 0);
if (r < 0)
return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
if (r > 0) /* make this a NOP if /proc is already a mount point */
return 0;

/* Mount a new instance, so that we get the one that matches our user namespace, if we are running in one */
if (mount("proc", bind_mount_path(m), "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
return log_debug_errno(errno, "Failed to mount %s: %m", bind_mount_path(m));

return 1;
}

static int apply_mount(
BindMount *m,
const char *tmp_dir,
Expand Down Expand Up @@ -614,7 +689,16 @@ static int apply_mount(
break;

case PRIVATE_DEV:
return mount_dev(m);
return mount_private_dev(m);

case BIND_DEV:
return mount_bind_dev(m);

case SYSFS:
return mount_sysfs(m);

case PROCFS:
return mount_procfs(m);

default:
assert_not_reached("Unknown mode");
Expand All @@ -634,7 +718,7 @@ static int make_read_only(BindMount *m, char **blacklist) {

assert(m);

if (IN_SET(m->mode, INACCESSIBLE, READONLY))
if (bind_mount_read_only(m))
r = bind_remount_recursive(bind_mount_path(m), true, blacklist);
else if (m->mode == PRIVATE_DEV) { /* Can be readonly but the submounts can't*/
if (mount(NULL, bind_mount_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
Expand Down Expand Up @@ -694,6 +778,7 @@ static int chase_all_symlinks(const char *root_directory, BindMount *m, unsigned
}

static unsigned namespace_calculate_mounts(
const char *root_directory,
const NameSpaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
Expand All @@ -718,7 +803,9 @@ static unsigned namespace_calculate_mounts(
((protect_home == PROTECT_HOME_READ_ONLY) ?
ELEMENTSOF(protect_home_read_only_table) : 0));

return !!tmp_dir + !!var_tmp_dir +
return (root_directory ? ELEMENTSOF(root_directory_table) : 0) +
!!tmp_dir +
!!var_tmp_dir +
strv_length(read_write_paths) +
strv_length(read_only_paths) +
strv_length(inaccessible_paths) +
Expand Down Expand Up @@ -749,12 +836,14 @@ int setup_namespace(
if (mount_flags == 0)
mount_flags = MS_SHARED;

n_mounts = namespace_calculate_mounts(ns_info,
read_write_paths,
read_only_paths,
inaccessible_paths,
tmp_dir, var_tmp_dir,
protect_home, protect_system);
n_mounts = namespace_calculate_mounts(
root_directory,
ns_info,
read_write_paths,
read_only_paths,
inaccessible_paths,
tmp_dir, var_tmp_dir,
protect_home, protect_system);

/* Set mount slave mode */
if (root_directory || n_mounts > 0)
Expand Down Expand Up @@ -795,6 +884,12 @@ int setup_namespace(
};
}

if (root_directory) {
r = append_static_mounts(&m, root_directory_table, ELEMENTSOF(root_directory_table), ns_info->ignore_protect_paths);
if (r < 0)
goto finish;
}

if (ns_info->protect_kernel_tunables) {
r = append_static_mounts(&m, protect_kernel_tunables_table, ELEMENTSOF(protect_kernel_tunables_table), ns_info->ignore_protect_paths);
if (r < 0)
Expand Down

0 comments on commit 01c33c1

Please sign in to comment.