Permalink
...
Checking mergeability…
Don’t worry, you can still create the pull request.
Comparing changes
Open a pull request
- 1 commit
- 7 files changed
- 0 commit comments
- 1 contributor
Unified
Split
Showing
with
250 additions
and 23 deletions.
- +82 −1 cmd/libsnap-confine-private/cgroup-freezer-support.c
- +9 −0 cmd/libsnap-confine-private/cgroup-freezer-support.h
- +44 −13 cmd/snap-confine/ns-support.c
- +6 −5 cmd/snap-confine/ns-support.h
- +18 −1 cmd/snap-confine/snap-confine.apparmor.in
- +15 −3 cmd/snap-confine/snap-confine.c
- +76 −0 tests/main/stale-base-snap/task.yaml
View
83
cmd/libsnap-confine-private/cgroup-freezer-support.c
| @@ -19,7 +19,7 @@ static const char *freezer_cgroup_dir = "/sys/fs/cgroup/freezer"; | ||
| void sc_cgroup_freezer_join(const char *snap_name, pid_t pid) | ||
| { | ||
| - // Format the name of the cgroup hierarchy. | ||
| + // Format the name of the cgroup hierarchy. | ||
| char buf[PATH_MAX] = { 0 }; | ||
| sc_must_snprintf(buf, sizeof buf, "snap.%s", snap_name); | ||
| @@ -65,3 +65,84 @@ void sc_cgroup_freezer_join(const char *snap_name, pid_t pid) | ||
| debug("moved process %ld to freezer cgroup hierarchy for snap %s", | ||
| (long)pid, snap_name); | ||
| } | ||
| + | ||
| +bool sc_cgroup_freezer_occupied(const char *snap_name, uid_t uid) | ||
| +{ | ||
| + // Format the name of the cgroup hierarchy. | ||
| + char buf[PATH_MAX] = { 0 }; | ||
| + sc_must_snprintf(buf, sizeof buf, "snap.%s", snap_name); | ||
| + | ||
| + // Open the freezer cgroup directory. | ||
| + int cgroup_fd SC_CLEANUP(sc_cleanup_close) = -1; | ||
| + cgroup_fd = open(freezer_cgroup_dir, | ||
| + O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC); | ||
| + if (cgroup_fd < 0) { | ||
| + die("cannot open freezer cgroup (%s)", freezer_cgroup_dir); | ||
| + } | ||
| + // Open the proc directory. | ||
| + int proc_fd SC_CLEANUP(sc_cleanup_close) = -1; | ||
| + proc_fd = open("/proc", O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC); | ||
| + if (proc_fd < 0) { | ||
| + die("cannot open /proc"); | ||
| + } | ||
| + // Open the hierarchy directory for the given snap. | ||
| + int hierarchy_fd SC_CLEANUP(sc_cleanup_close) = -1; | ||
| + hierarchy_fd = openat(cgroup_fd, buf, | ||
| + O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC); | ||
| + if (hierarchy_fd < 0) { | ||
| + if (errno == ENOENT) { | ||
| + return false; | ||
| + } | ||
| + die("cannot open freezer cgroup hierarchy for snap %s", | ||
| + snap_name); | ||
| + } | ||
| + // Open the "cgroup.procs" file. Alternatively we could open the "tasks" | ||
| + // file and see per-thread data but we don't need that. | ||
| + int cgroup_procs_fd SC_CLEANUP(sc_cleanup_close) = -1; | ||
| + cgroup_procs_fd = openat(hierarchy_fd, "cgroup.procs", | ||
| + O_RDONLY | O_NOFOLLOW | O_CLOEXEC); | ||
| + if (cgroup_procs_fd < 0) { | ||
| + die("cannot open cgroup.procs file for freezer cgroup hierarchy for snap %s", snap_name); | ||
| + } | ||
| + | ||
| + FILE *cgroup_procs SC_CLEANUP(sc_cleanup_file) = NULL; | ||
| + cgroup_procs = fdopen(cgroup_procs_fd, "r"); | ||
| + if (cgroup_procs == NULL) { | ||
| + die("cannot convert tasks file descriptor to FILE"); | ||
| + } | ||
| + cgroup_procs_fd = -1; // tasks_fd will now be closed by fclose. | ||
| + | ||
| + char *line_buf SC_CLEANUP(sc_cleanup_string) = NULL; | ||
| + size_t line_buf_size = 0; | ||
| + ssize_t num_read; | ||
| + struct stat statbuf; | ||
| + do { | ||
| + num_read = getline(&line_buf, &line_buf_size, cgroup_procs); | ||
| + if (num_read < 0 && errno != 0) { | ||
| + die("cannot read next PID belonging to snap %s", | ||
| + snap_name); | ||
| + } | ||
| + if (num_read <= 0) { | ||
| + break; | ||
| + } | ||
| + if (num_read > 0 && line_buf[num_read - 1] == '\n') { | ||
| + line_buf[num_read - 1] = '\0'; | ||
| + } | ||
| + debug("found process id: %s\n", line_buf); | ||
| + | ||
| + if (fstatat(proc_fd, line_buf, &statbuf, AT_SYMLINK_NOFOLLOW) < | ||
| + 0) { | ||
| + // The process may have died already. | ||
| + if (errno != ENOENT) { | ||
| + die("cannot stat /proc/%s", line_buf); | ||
| + } | ||
| + } | ||
| + if (statbuf.st_uid == uid || uid == (uid_t) - 1) { | ||
| + debug("found process %s belonging to user %d", | ||
| + line_buf, statbuf.st_uid); | ||
| + return true; | ||
| + } | ||
| + } while (num_read > 0); | ||
| + | ||
| + return false; | ||
| +} | ||
View
9
cmd/libsnap-confine-private/cgroup-freezer-support.h
| @@ -23,4 +23,13 @@ | ||
| **/ | ||
| void sc_cgroup_freezer_join(const char *snap_name, pid_t pid); | ||
| +/** | ||
| + * Check if a freezer cgroup for given snap has any processes belonging to a given user. | ||
| + * | ||
| + * This function examines the freezer cgroup called "snap.$snap_name" and looks | ||
| + * at each of its processes. If any process that belongs to the specified user, | ||
| + * or a wild-card value of -1, exists then the function returns true. | ||
| +**/ | ||
| +bool sc_cgroup_freezer_occupied(const char *snap_name, uid_t uid); | ||
| + | ||
| #endif | ||
View
57
cmd/snap-confine/ns-support.c
| @@ -38,11 +38,12 @@ | ||
| #include <sys/wait.h> | ||
| #include <unistd.h> | ||
| +#include "../libsnap-confine-private/cgroup-freezer-support.h" | ||
| #include "../libsnap-confine-private/cleanup-funcs.h" | ||
| +#include "../libsnap-confine-private/locking.h" | ||
| #include "../libsnap-confine-private/mountinfo.h" | ||
| #include "../libsnap-confine-private/string-utils.h" | ||
| #include "../libsnap-confine-private/utils.h" | ||
| -#include "../libsnap-confine-private/locking.h" | ||
| #include "user-support.h" | ||
| /*! | ||
| @@ -306,11 +307,12 @@ static bool should_discard_current_ns(dev_t base_snap_dev) | ||
| die("cannot find mount entry of the root filesystem inside snap namespace"); | ||
| } | ||
| -void sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| - struct sc_apparmor *apparmor, | ||
| - const char *base_snap_name, | ||
| - const char *snap_name) | ||
| +int sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| + struct sc_apparmor *apparmor, | ||
| + const char *base_snap_name, | ||
| + const char *snap_name) | ||
| { | ||
| + int retcode = 0; | ||
| // Open the mount namespace file. | ||
| char mnt_fname[PATH_MAX] = { 0 }; | ||
| sc_must_snprintf(mnt_fname, sizeof mnt_fname, "%s%s", group->name, | ||
| @@ -385,12 +387,40 @@ void sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| ("successfully re-associated the mount namespace with the namespace group %s", | ||
| group->name); | ||
| - bool should_discard_ns = | ||
| - should_discard_current_ns(base_snap_dev); | ||
| - | ||
| - if (should_discard_ns) { | ||
| - debug("discarding obsolete base filesystem namespace"); | ||
| - debug("(not yet implemented)"); | ||
| + if (should_discard_current_ns(base_snap_dev)) { | ||
| + debug("the namespace is stale and should be discarded"); | ||
| + if (sc_cgroup_freezer_occupied(snap_name, (uid_t) - 1)) { | ||
| + fprintf(stderr, | ||
| + "snap %s cannot use current base snap %s because " | ||
| + "existing process are still using the old revision\n", | ||
| + snap_name, base_snap_name); | ||
| + } else { | ||
| + debug("discarding stale mount namespace"); | ||
| + // This will move us back to the main mount namespace. We have | ||
| + // to do this as the directory with mount namespaces is | ||
| + // privately shared (so not shared) so from this perspective | ||
| + // (inside a snap mount namespace) we cannot unmount it - it's | ||
| + // not mounted. | ||
| + sc_reassociate_with_pid1_mount_ns(); | ||
| + // XXX: This doesn't work (apparmor is confused about | ||
| + // file descriptors and namespace transitions | ||
| + // apparently). Instead of doing that, just unmount the | ||
| + // thing manually with a full path. | ||
| + // | ||
| + // sc_discard_preserved_ns_group(group); | ||
| + sc_must_snprintf(mnt_fname, sizeof mnt_fname, | ||
| + "%s/%s%s", sc_ns_dir, | ||
| + group->name, SC_NS_MNT_FILE); | ||
| + // Use MNT_DETACH as otherwise we get EBUSY. | ||
| + if (umount2(mnt_fname, MNT_DETACH|UMOUNT_NOFOLLOW) < 0) { | ||
| + die("cannot umount stale mount namespace %s", mnt_fname); | ||
| + } | ||
| + debug("stale mount namespace discarded"); | ||
| + // Instead of returning, carry on with the code below to | ||
| + // restore the correct working directory for a subsequent | ||
| + // second invocation of this function. | ||
| + retcode = EAGAIN; | ||
| + } | ||
| } | ||
| // Try to re-locate back to vanilla working directory. This can fail | ||
| // because that directory is no longer present. | ||
| @@ -404,7 +434,7 @@ void sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| } | ||
| debug("successfully moved to %s", SC_VOID_DIR); | ||
| } | ||
| - return; | ||
| + return retcode; | ||
| } | ||
| debug("initializing new namespace group %s", group->name); | ||
| // Create a new namespace and ask the caller to populate it. | ||
| @@ -501,9 +531,10 @@ void sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| } | ||
| group->should_populate = true; | ||
| } | ||
| + return 0; | ||
| } | ||
| -bool sc_should_populate_ns_group(struct sc_ns_group *group) | ||
| +bool sc_should_populate_ns_group(struct sc_ns_group * group) | ||
| { | ||
| return group->should_populate; | ||
| } | ||
View
11
cmd/snap-confine/ns-support.h
| @@ -105,12 +105,13 @@ void sc_close_ns_group(struct sc_ns_group *group); | ||
| * the parent process unshares the mount namespace and sets a flag so that | ||
| * sc_should_populate_ns_group() returns true. | ||
| * | ||
| - * @returns true if the mount namespace needs to be populated | ||
| + * @returns 0 on success and EAGAIN if the namespace was stale and needs | ||
| + * to be re-made. | ||
| **/ | ||
| -void sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| - struct sc_apparmor *apparmor, | ||
| - const char *base_snap_name, | ||
| - const char *snap_name); | ||
| +int sc_create_or_join_ns_group(struct sc_ns_group *group, | ||
| + struct sc_apparmor *apparmor, | ||
| + const char *base_snap_name, | ||
| + const char *snap_name); | ||
| /** | ||
| * Check if the namespace needs to be populated. | ||
View
19
cmd/snap-confine/snap-confine.apparmor.in
| @@ -55,10 +55,12 @@ | ||
| # cgroup: freezer | ||
| # Allow creating per-snap cgroup freezers and adding snap command (task) | ||
| # invocations to the freezer. This allows for reliably enumerating all | ||
| - # running tasks for the snap. | ||
| + # running tasks for the snap. In addition, allow enumerating processes in | ||
| + # the cgroup to determine if it is occupied. | ||
| /sys/fs/cgroup/freezer/ r, | ||
| /sys/fs/cgroup/freezer/snap.*/ w, | ||
| /sys/fs/cgroup/freezer/snap.*/tasks w, | ||
| + /sys/fs/cgroup/freezer/snap.*/cgroup.procs r, | ||
| # querying udev | ||
| /etc/udev/udev.conf r, | ||
| @@ -303,6 +305,21 @@ | ||
| # Allow snap-confine to read snap contexts | ||
| /var/lib/snapd/context/snap.* r, | ||
| + # Allow snap-confine to unmount stale mount namespaces. | ||
| + # XXX: This rule is not necessary due to the bug mentioned below. I'm | ||
| + # including it in the hope that it can be used once the kernel is fixed. | ||
| + umount /run/snapd/ns/*.mnt, | ||
| + | ||
| + # XXX: This looks like a bug in apparmor where it becomes woefully confused | ||
| + # about where we are after setns calls that jump in and then out of a mount | ||
| + # namespace. Without the following rule I see this failure while executing | ||
| + # qemu:ubuntu-16.04-64:tests/main/stale-base-snap | ||
| + # | ||
| + # Nov 29 22:11:00 autopkgtest audit[21993]: AVC apparmor="DENIED" | ||
| + # operation="umount" profile="/snap/core/x1/usr/lib/snapd/snap-confine" | ||
| + # name="/" pid=21993 comm="snap-confine" | ||
| + umount /, | ||
| + | ||
| # Support for the quirk system | ||
| /var/ r, | ||
| /var/lib/ r, | ||
View
18
cmd/snap-confine/snap-confine.c
| @@ -18,12 +18,13 @@ | ||
| #include "config.h" | ||
| #endif | ||
| +#include <errno.h> | ||
| #include <glob.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| -#include <sys/types.h> | ||
| #include <sys/stat.h> | ||
| +#include <sys/types.h> | ||
| #include <unistd.h> | ||
| #include "../libsnap-confine-private/cgroup-freezer-support.h" | ||
| @@ -222,8 +223,19 @@ int main(int argc, char **argv) | ||
| debug("initializing mount namespace: %s", snap_name); | ||
| struct sc_ns_group *group = NULL; | ||
| group = sc_open_ns_group(snap_name, 0); | ||
| - sc_create_or_join_ns_group(group, &apparmor, | ||
| - base_snap_name, snap_name); | ||
| + if (sc_create_or_join_ns_group(group, &apparmor, | ||
| + base_snap_name, | ||
| + snap_name) == EAGAIN) { | ||
| + // If the namespace was stale and was discarded we just need to | ||
| + // try again. Since this is done with the per-snap lock held | ||
| + // there are no races here. | ||
| + if (sc_create_or_join_ns_group(group, &apparmor, | ||
| + base_snap_name, | ||
| + snap_name) == | ||
| + EAGAIN) { | ||
| + die("unexpectedly the namespace needs to be discarded again"); | ||
| + } | ||
| + } | ||
| if (sc_should_populate_ns_group(group)) { | ||
| sc_populate_mount_ns(base_snap_name, snap_name); | ||
| sc_preserve_populated_ns_group(group); | ||
View
76
tests/main/stale-base-snap/task.yaml
| @@ -0,0 +1,76 @@ | ||
| +summary: when the base snap changes revision apps are not stuck on the stale one | ||
| +details: | | ||
| + When a snap application starts running the mount namespace it inhabits is | ||
| + preserved and cached across all the applications belonging to a given snap. | ||
| + This gives the system the feeling of integrity as all processes belonging | ||
| + to that snap have the same filesystem view. When the base snap revision | ||
| + changes (potentially bringing in important bug and security fixes) the | ||
| + mount namespace is not immediately re-created. As long as application | ||
| + process inhabit the old mount namespace it will stay as is. When the last | ||
| + process dies the subsequently started process will detect those two facts | ||
| + (stale and unused mount namespace) and discard it. | ||
| + | ||
| +# This test doesn't run on core because there snapd will reboot the machine as | ||
| +# soon as the new core snap is installed. A variant of this using a non-core | ||
| +# base snap is possible but that is a separate test. | ||
| +systems: [-ubuntu-core-*] | ||
| +prepare: | | ||
| + # We will need the Swiss-army-knife shell snap. We want to use it in | ||
| + # devmode so that we can look at /customized file which is non-standard. | ||
| + . $TESTSLIB/snaps.sh | ||
| + install_local_devmode test-snapd-sh | ||
| + | ||
| + # We will also need to prepare a hacked core snap that just differs from | ||
| + # our own in some detail that we can measure. | ||
| + unsquashfs $(ls /var/lib/snapd/snaps/core_*.snap | sort -r -n | head -n 1) | ||
| + touch squashfs-root/customized | ||
| + sed -i -e 's/version: .*/version: customized/' squashfs-root/meta/snap.yaml | ||
| + | ||
| + mksnap_fast squashfs-root core-customized.snap | ||
| +execute: | | ||
| + # Start a "sleep" process in the background | ||
| + test-snapd-sh -c 'touch $SNAP_DATA/stamp && exec sleep 1h' & | ||
| + pid=$! | ||
| + | ||
| + # Ensure that snap-confine has finished its task and that the snap process | ||
| + # is active. Note that we don't want to wait forever either. | ||
| + for i in $(seq 30); do | ||
| + test -e /var/snap/test-snapd-sh/current/stamp && break | ||
| + sleep 0.1 | ||
| + done | ||
| + | ||
| + # Create another stamp file in /tmp (which is private to the snap). We'll | ||
| + # need it in a moment. | ||
| + test-snapd-sh -c "touch /tmp/stamp" | ||
| + | ||
| + # Now, refresh core to our customized core snap. | ||
| + snap install --dangerous ./core-customized.snap | ||
| + snap list | MATCH customized | ||
| + | ||
| + # Now we are in a situation where the core snap is stale but our sleeper | ||
| + # application is still alive so we cannot use it. We can ensure that this | ||
| + # is the case by looking at our stamp file and seeing it being there. | ||
| + test-snapd-sh -c "test -e /tmp/stamp" | ||
| + test-snapd-sh -c "test ! -e /customized" | ||
| + | ||
| + # We are even very vocal about this and leave a message to the | ||
| + # administrator. | ||
| + if test-snapd-sh -c "true" | grep "snap test-snapd-sh cannot use current base snap core because existing process are still using the old revision"; then | ||
| + echo "Expected a warning, got none" | ||
| + kill "$pid" || true | ||
| + wait -n || true | ||
| + exit 1 | ||
| + fi | ||
| + | ||
| + # Kill our helper process. | ||
| + kill "$pid" || true | ||
| + wait -n || true | ||
| + | ||
| + # Now when the process terminates, we no longer need to hold back. | ||
| + # The next process will use a fresh namespace and will no longer see | ||
| + # the stamp file in /tmp because /tmp is now empty. | ||
| + test-snapd-sh -c "test ! -e /tmp/stamp" | ||
| + test-snapd-sh -c "test -e /customized" | ||
| +restore: | | ||
| + rm -rf squashfs-root | ||
| + rm -f core-customized.snap |