From 638bd6594fa0ca974ef288f23c5bf400ee4cf7b6 Mon Sep 17 00:00:00 2001 From: William Manley Date: Sun, 5 Feb 2017 23:39:16 +0000 Subject: [PATCH] Add --overlay and --ro-overlay command line options These enable bubblewrap to create overlay mounts. This will be useful for an ostree-based build system we use where overlayfs ensures that none of the ostree hard-linked files I checkout get modified. Currently we use a maze of bash/unshare/mount/sudo/chroot where bubblewrap will be much nicer. This commit contains a bit of string manipulation, which isn't particularly fun to write in C. Hopefully I got it right. I've had to add some additional capabilities because otherwise overlayfs has some difficulty writing to directories that exist in the lower filesystem but are not yet in the upper. It's got something to do with the `work` directory that overlayfs uses. overlayfs will create a `work` directory with 0 permissions. Ordinarily root would be able to write to this directory because permissions checks don't apply to it, but that requires these additional capabilities: * `CAP_DAC_OVERRIDE` * Bypass file read, write, and execute permission checks. ... * `CAP_DAC_READ_SEARCH` * Bypass file read permission checks and directory read and execute permission checks; * `CAP_FOWNER` * Bypass permission checks on operations that normally require the filesystem UID of the process to match the UID of the file ... (see `man 7 capabilities`) No tests are written to exercise this new feature. --- bubblewrap.c | 162 ++++++++++++++++++++++++++++++++++++++++- bwrap.xml | 35 +++++++++ completions/bash/bwrap | 2 + 3 files changed, 198 insertions(+), 1 deletion(-) diff --git a/bubblewrap.c b/bubblewrap.c index 409d94c4..201372d6 100644 --- a/bubblewrap.c +++ b/bubblewrap.c @@ -77,6 +77,8 @@ typedef enum { SETUP_BIND_MOUNT, SETUP_RO_BIND_MOUNT, SETUP_DEV_BIND_MOUNT, + SETUP_OVERLAY_MOUNT, + SETUP_RO_OVERLAY_MOUNT, SETUP_MOUNT_PROC, SETUP_MOUNT_DEV, SETUP_MOUNT_TMPFS, @@ -101,6 +103,12 @@ struct _SetupOp SetupOpType type; const char *source; const char *dest; + + /* for overlayfs: */ + const char *layers; + const char *workdir; + const char *options; + int fd; SetupOpFlag flags; SetupOp *next; @@ -122,6 +130,7 @@ static LockFile *last_lock_file = NULL; enum { PRIV_SEP_OP_DONE, PRIV_SEP_OP_BIND_MOUNT, + PRIV_SEP_OP_OVERLAY_MOUNT, PRIV_SEP_OP_PROC_MOUNT, PRIV_SEP_OP_TMPFS_MOUNT, PRIV_SEP_OP_DEVPTS_MOUNT, @@ -202,6 +211,11 @@ usage (int ecode, FILE *out) " --dev-bind SRC DEST Bind mount the host path SRC on DEST, allowing device access\n" " --ro-bind SRC DEST Bind mount the host path SRC readonly on DEST\n" " --remount-ro DEST Remount DEST as readonly, it doesn't recursively remount\n" + " --overlay LAYERS DEST WORKDIR Mount overlayfs on DEST. LAYERS is a colon seperated list of\n" + " directories. WORKDIR must be an empty directory on the same\n" + " filesystem as the last layer.\n" + " --overlay-ro LAYERS DEST Mount overlayfs read-only on DEST. LAYERS is a colon seperated list\n" + " of directories\n" " --exec-label LABEL Exec Label for the sandbox\n" " --file-label LABEL File label for temporary sandbox content\n" " --proc DEST Mount procfs on DEST\n" @@ -434,7 +448,7 @@ do_init (int event_fd, pid_t initial_pid, struct sock_fprog *seccomp_prog) } /* low 32bit caps needed */ -#define REQUIRED_CAPS_0 (CAP_TO_MASK (CAP_SYS_ADMIN) | CAP_TO_MASK (CAP_SYS_CHROOT) | CAP_TO_MASK (CAP_NET_ADMIN) | CAP_TO_MASK (CAP_SETUID) | CAP_TO_MASK (CAP_SETGID)) +#define REQUIRED_CAPS_0 (CAP_TO_MASK (CAP_SYS_ADMIN) | CAP_TO_MASK (CAP_SYS_CHROOT) | CAP_TO_MASK (CAP_NET_ADMIN) | CAP_TO_MASK (CAP_SETUID) | CAP_TO_MASK (CAP_SETGID) | CAP_TO_MASK (CAP_DAC_OVERRIDE) | CAP_TO_MASK (CAP_DAC_READ_SEARCH) | CAP_TO_MASK (CAP_FOWNER)) /* high 32bit caps needed */ #define REQUIRED_CAPS_1 0 @@ -778,6 +792,12 @@ privileged_op (int privileged_op_socket, die_with_error ("Can't mount mqueue on %s", arg1); break; + case PRIV_SEP_OP_OVERLAY_MOUNT: + if (mount ("overlay", arg2, "overlay", MS_MGC_VAL, arg1) != 0) + die_with_error ("Can't make overlay mount on %s with options %s", + arg2, arg1); + break; + case PRIV_SEP_OP_SET_HOSTNAME: /* This is checked at the start, but lets verify it here in case something manages to send hacked priv-sep operation requests. */ @@ -792,6 +812,103 @@ privileged_op (int privileged_op_socket, } } +struct _StringBuilder +{ + char * str; + size_t size; + size_t offset; +}; + +static void +strappend(struct _StringBuilder *dest, const char *src) +{ + size_t len = strlen(src); + if (dest->offset + len >= dest->size) { + dest->size = (dest->size + len) * 2; + dest->str = realloc(dest->str, dest->size); + if (dest->str == NULL) + die ("Out of memory"); + } + + strcpy(dest->str + dest->offset, src); + dest->offset += len; +} + +static char * +realpathx(const char *path, char *resolved_path) +{ + char *rp; + rp = realpath(path, resolved_path); + if (rp == NULL) + die("Failed to resolve path %s", path); + return rp; +} + +/* + * "/hello:/goodbye" -> "lowerdir=/oldroot/hello:/oldroot/goodbye" + */ +static char * +ro_overlay_options(const char* layers) +{ + struct _StringBuilder sb = {0}; + cleanup_free char *layers_mut = strdup(layers); + char buf[PATH_MAX]; + char * token; + int first = 1; + + strappend(&sb, "lowerdir="); + + token = strtok(layers_mut, ":"); + while (token != NULL) { + if (!first) + strappend(&sb, ":"); + strappend(&sb, "/oldroot"); + /* Resolve absolute symlinks before we remount under /oldroot: */ + strappend(&sb, realpathx (token, buf)); + + token = strtok(NULL, ":"); + first = 0; + } + return sb.str; +} + +/* + * "/hello:/goodbye", "/moo" -> "lowerdir=/oldroot/hello,upperdir=/oldroot/goodbye,workdir=/oldroot/moo" + * "/hello:/3:/goodbye", "/moo" -> "lowerdir=/oldroot/hello:/oldroot/3,upperdir=/oldroot/goodbye,workdir=/oldroot/moo" + */ +static char * +rw_overlay_options(const char* layers, const char* workdir) +{ + struct _StringBuilder sb = {0}; + cleanup_free char *layers_mut = strdup(layers); + char buf[PATH_MAX]; + char *path, *next; + int first = 1; + + strappend(&sb, "lowerdir="); + + next = strtok(layers_mut, ":"); + while (1) { + path = next; + next = strtok(NULL, ":"); + if (next == NULL) + break; + + if (!first) + strappend(&sb, ":"); + strappend(&sb, "/oldroot"); + /* Resolve absolute symlinks before we remount under /oldroot: */ + strappend(&sb, realpathx (path, buf)); + + first = 0; + } + strappend(&sb, ",upperdir=/oldroot"); + strappend(&sb, realpathx (path, buf)); + strappend(&sb, ",workdir=/oldroot"); + strappend(&sb, realpathx (workdir, buf)); + return sb.str; +} + /* This is run unprivileged in the child namespace but can request * some privileged operations (also in the child namespace) via the * privileged_op_socket. @@ -846,6 +963,18 @@ setup_newroot (bool unshare_pid, source, dest); break; + case SETUP_OVERLAY_MOUNT: + case SETUP_RO_OVERLAY_MOUNT: + { + cleanup_free char *options = NULL; + if (mkdir (dest, 0755) != 0 && errno != EEXIST) + die_with_error ("Can't mkdir %s", op->dest); + + privileged_op (privileged_op_socket, + PRIV_SEP_OP_OVERLAY_MOUNT, 0, op->options, dest); + } + break; + case SETUP_REMOUNT_RO_NO_RECURSIVE: privileged_op (privileged_op_socket, PRIV_SEP_OP_REMOUNT_RO_NO_RECURSIVE, BIND_READONLY, NULL, dest); @@ -1063,6 +1192,12 @@ resolve_symlinks_in_ops (void) if (op->source == NULL) die_with_error ("Can't find source path %s", old_source); break; + case SETUP_RO_OVERLAY_MOUNT: + op->options = ro_overlay_options(op->layers); + break; + case SETUP_OVERLAY_MOUNT: + op->options = rw_overlay_options(op->layers, op->workdir); + break; default: break; } @@ -1315,6 +1450,31 @@ parse_args_recurse (int *argcp, op->source = argv[1]; op->dest = argv[2]; + argv += 2; + argc -= 2; + } + else if (strcmp (arg, "--overlay") == 0) + { + if (argc < 4) + die ("--overlay takes three arguments"); + + op = setup_op_new (SETUP_OVERLAY_MOUNT); + op->layers = argv[1]; + op->dest = argv[2]; + op->workdir = argv[3]; + + argv += 3; + argc -= 3; + } + else if (strcmp (arg, "--ro-overlay") == 0) + { + if (argc < 3) + die ("--ro-overlay takes two arguments"); + + op = setup_op_new (SETUP_RO_OVERLAY_MOUNT); + op->layers = argv[1]; + op->dest = argv[2]; + argv += 2; argc -= 2; } diff --git a/bwrap.xml b/bwrap.xml index b7a5c41c..6e48d65b 100644 --- a/bwrap.xml +++ b/bwrap.xml @@ -195,6 +195,41 @@ Remount the path DEST as readonly. It works only on the specified mount point, without changing any other mount point under the specified path + + + + + + + Use overlayfs to bind mount the host paths + LAYERS on DEST. + LAYERS is a colon seperated list of paths. + DEST will contain the union of all the files + in all the LAYERS. The paths listed in + LAYERS may not contain a comma (,) or a colon (:). + + + With --overlay all writes will go to the + top layer which is the last layer in the list. + WORKDIR must be an empty directory on the + same filesystem as the top layer. + + + With --ro-overlay the filesystem will be + mounted read-only so a WORKDIR is not needed + and shouldn't be provided. + + + Using --ro-overlay or providing more than + one layer requires a Linux kernel version of 4.0 or later. + + + For more information see the Overlay Filesystem documentation in the + Linux kernel at + https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt + + + Mount procfs on DEST diff --git a/completions/bash/bwrap b/completions/bash/bwrap index 34780ba3..e669fd10 100644 --- a/completions/bash/bwrap +++ b/completions/bash/bwrap @@ -25,6 +25,8 @@ _bwrap() { --args --bind --bind-data + --overlay + --ro-overlay --block-fd --chdir --dev