Permalink
1e407d3 Nov 6, 2017
@zyga @mvo5 @bboozzoo @jdstrand
716 lines (679 sloc) 27 KB
/*
* Copyright (C) 2015 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "config.h"
#include "mount-support.h"
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <limits.h>
#include <mntent.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <libgen.h>
#include "../libsnap-confine-private/classic.h"
#include "../libsnap-confine-private/cleanup-funcs.h"
#include "../libsnap-confine-private/mount-opt.h"
#include "../libsnap-confine-private/mountinfo.h"
#include "../libsnap-confine-private/snap.h"
#include "../libsnap-confine-private/string-utils.h"
#include "../libsnap-confine-private/utils.h"
#include "mount-support-nvidia.h"
#include "quirks.h"
#define MAX_BUF 1000
/*!
* The void directory.
*
* Snap confine moves to that directory in case it cannot retain the current
* working directory across the pivot_root call.
**/
#define SC_VOID_DIR "/var/lib/snapd/void"
// TODO: simplify this, after all it is just a tmpfs
// TODO: fold this into bootstrap
static void setup_private_mount(const char *snap_name)
{
uid_t uid = getuid();
gid_t gid = getgid();
char tmpdir[MAX_BUF] = { 0 };
// Create a 0700 base directory, this is the base dir that is
// protected from other users.
//
// Under that basedir, we put a 1777 /tmp dir that is then bind
// mounted for the applications to use
sc_must_snprintf(tmpdir, sizeof(tmpdir), "/tmp/snap.%d_%s_XXXXXX", uid,
snap_name);
if (mkdtemp(tmpdir) == NULL) {
die("cannot create temporary directory essential for private /tmp");
}
// now we create a 1777 /tmp inside our private dir
mode_t old_mask = umask(0);
char *d = strdup(tmpdir);
if (!d) {
die("cannot allocate memory for string copy");
}
sc_must_snprintf(tmpdir, sizeof(tmpdir), "%s/tmp", d);
free(d);
if (mkdir(tmpdir, 01777) != 0) {
die("cannot create temporary directory for private /tmp");
}
umask(old_mask);
// chdir to '/' since the mount won't apply to the current directory
char *pwd = get_current_dir_name();
if (pwd == NULL)
die("cannot get current working directory");
if (chdir("/") != 0)
die("cannot change directory to '/'");
// MS_BIND is there from linux 2.4
sc_do_mount(tmpdir, "/tmp", NULL, MS_BIND, NULL);
// MS_PRIVATE needs linux > 2.6.11
sc_do_mount("none", "/tmp", NULL, MS_PRIVATE, NULL);
// do the chown after the bind mount to avoid potential shenanigans
if (chown("/tmp/", uid, gid) < 0) {
die("cannot change ownership of /tmp");
}
// chdir to original directory
if (chdir(pwd) != 0)
die("cannot change current working directory to the original directory");
free(pwd);
}
// TODO: fold this into bootstrap
static void setup_private_pts(void)
{
// See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
//
// Ubuntu by default uses devpts 'single-instance' mode where
// /dev/pts/ptmx is mounted with ptmxmode=0000. We don't want to change
// the startup scripts though, so we follow the instructions in point
// '4' of 'User-space changes' in the above doc. In other words, after
// unshare(CLONE_NEWNS), we mount devpts with -o
// newinstance,ptmxmode=0666 and then bind mount /dev/pts/ptmx onto
// /dev/ptmx
struct stat st;
// Make sure /dev/pts/ptmx exists, otherwise we are in legacy mode
// which doesn't provide the isolation we require.
if (stat("/dev/pts/ptmx", &st) != 0) {
die("cannot stat /dev/pts/ptmx");
}
// Make sure /dev/ptmx exists so we can bind mount over it
if (stat("/dev/ptmx", &st) != 0) {
die("cannot stat /dev/ptmx");
}
// Since multi-instance, use ptmxmode=0666. The other options are
// copied from /etc/default/devpts
sc_do_mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
"newinstance,ptmxmode=0666,mode=0620,gid=5");
sc_do_mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0);
}
/**
* Setup mount profiles by running snap-update-ns.
*
* The first argument is an open file descriptor (though opened with O_PATH, so
* not as powerful), to a copy of snap-update-ns. The program is opened before
* the root filesystem is pivoted so that it is easier to pick the right copy.
**/
static void sc_setup_mount_profiles(int snap_update_ns_fd,
const char *snap_name)
{
debug("calling snap-update-ns to initialize mount namespace");
pid_t child = fork();
if (child < 0) {
die("cannot fork to run snap-update-ns");
}
if (child == 0) {
// We are the child, execute snap-update-ns
char *snap_name_copy SC_CLEANUP(sc_cleanup_string) = NULL;
snap_name_copy = strdup(snap_name);
if (snap_name_copy == NULL) {
die("cannot copy snap name");
}
char *argv[] = {
"snap-update-ns", "--from-snap-confine", snap_name_copy,
NULL
};
char *envp[3] = { NULL };
if (sc_is_debug_enabled()) {
envp[0] = "SNAPD_DEBUG=1";
}
debug("fexecv(%d (snap-update-ns), %s %s %s,)",
snap_update_ns_fd, argv[0], argv[1], argv[2]);
fexecve(snap_update_ns_fd, argv, envp);
die("cannot execute snap-update-ns");
}
// We are the parent, so wait for snap-update-ns to finish.
int status = 0;
debug("waiting for snap-update-ns to finish...");
if (waitpid(child, &status, 0) < 0) {
die("waitpid() failed for snap-update-ns process");
}
if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
die("snap-update-ns failed with code %i", WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
die("snap-update-ns killed by signal %i", WTERMSIG(status));
}
debug("snap-update-ns finished successfully");
}
struct sc_mount {
const char *path;
bool is_bidirectional;
};
struct sc_mount_config {
const char *rootfs_dir;
// The struct is terminated with an entry with NULL path.
const struct sc_mount *mounts;
bool on_classic_distro;
bool uses_base_snap;
};
/**
* Bootstrap mount namespace.
*
* This is a chunk of tricky code that lets us have full control over the
* layout and direction of propagation of mount events. The documentation below
* assumes knowledge of the 'sharedsubtree.txt' document from the kernel source
* tree.
*
* As a reminder two definitions are quoted below:
*
* A 'propagation event' is defined as event generated on a vfsmount
* that leads to mount or unmount actions in other vfsmounts.
*
* A 'peer group' is defined as a group of vfsmounts that propagate
* events to each other.
*
* (end of quote).
*
* The main idea is to setup a mount namespace that has a root filesystem with
* vfsmounts and peer groups that, depending on the location, either isolate
* or share with the rest of the system.
*
* The vast majority of the filesystem is shared in one direction. Events from
* the outside (from the main mount namespace) propagate inside (to namespaces
* of particular snaps) so things like new snap revisions, mounted drives, etc,
* just show up as expected but even if a snap is exploited or malicious in
* nature it cannot affect anything in another namespace where it might cause
* security or stability issues.
*
* Selected directories (today just /media) can be shared in both directions.
* This allows snaps with sufficient privileges to either create, through the
* mount system call, additional mount points that are visible by the rest of
* the system (both the main mount namespace and namespaces of individual
* snaps) or remove them, through the unmount system call.
**/
static void sc_bootstrap_mount_namespace(const struct sc_mount_config *config)
{
char scratch_dir[] = "/tmp/snap.rootfs_XXXXXX";
char src[PATH_MAX] = { 0 };
char dst[PATH_MAX] = { 0 };
if (mkdtemp(scratch_dir) == NULL) {
die("cannot create temporary directory for the root file system");
}
// NOTE: at this stage we just called unshare(CLONE_NEWNS). We are in a new
// mount namespace and have a private list of mounts.
debug("scratch directory for constructing namespace: %s", scratch_dir);
// Make the root filesystem recursively shared. This way propagation events
// will be shared with main mount namespace.
sc_do_mount("none", "/", NULL, MS_REC | MS_SHARED, NULL);
// Bind mount the temporary scratch directory for root filesystem over
// itself so that it is a mount point. This is done so that it can become
// unbindable as explained below.
sc_do_mount(scratch_dir, scratch_dir, NULL, MS_BIND, NULL);
// Make the scratch directory unbindable.
//
// This is necessary as otherwise a mount loop can occur and the kernel
// would crash. The term unbindable simply states that it cannot be bind
// mounted anywhere. When we construct recursive bind mounts below this
// guarantees that this directory will not be replicated anywhere.
sc_do_mount("none", scratch_dir, NULL, MS_UNBINDABLE, NULL);
// Recursively bind mount desired root filesystem directory over the
// scratch directory. This puts the initial content into the scratch space
// and serves as a foundation for all subsequent operations below.
//
// The mount is recursive because it can either be applied to the root
// filesystem of a core system (aka all-snap) or the core snap on a classic
// system. In the former case we need recursive bind mounts to accurately
// replicate the state of the root filesystem into the scratch directory.
sc_do_mount(config->rootfs_dir, scratch_dir, NULL, MS_REC | MS_BIND,
NULL);
// Make the scratch directory recursively private. Nothing done there will
// be shared with any peer group, This effectively detaches us from the
// original namespace and coupled with pivot_root below serves as the
// foundation of the mount sandbox.
sc_do_mount("none", scratch_dir, NULL, MS_REC | MS_SLAVE, NULL);
// Bind mount certain directories from the host filesystem to the scratch
// directory. By default mount events will propagate in both into and out
// of the peer group. This way the running application can alter any global
// state visible on the host and in other snaps. This can be restricted by
// disabling the "is_bidirectional" flag as can be seen below.
for (const struct sc_mount * mnt = config->mounts; mnt->path != NULL;
mnt++) {
if (mnt->is_bidirectional && mkdir(mnt->path, 0755) < 0 &&
errno != EEXIST) {
die("cannot create %s", mnt->path);
}
sc_must_snprintf(dst, sizeof dst, "%s/%s", scratch_dir,
mnt->path);
sc_do_mount(mnt->path, dst, NULL, MS_REC | MS_BIND, NULL);
if (!mnt->is_bidirectional) {
// Mount events will only propagate inwards to the namespace. This
// way the running application cannot alter any global state apart
// from that of its own snap.
sc_do_mount("none", dst, NULL, MS_REC | MS_SLAVE, NULL);
}
}
if (config->on_classic_distro) {
// Since we mounted /etc from the host filesystem to the scratch directory,
// we may need to put certain directories from the desired root filesystem
// (e.g. the core snap) back. This way the behavior of running snaps is not
// affected by the alternatives directory from the host, if one exists.
//
// Fixes the following bugs:
// - https://bugs.launchpad.net/snap-confine/+bug/1580018
// - https://bugzilla.opensuse.org/show_bug.cgi?id=1028568
const char *dirs_from_core[] =
{ "/etc/alternatives", "/etc/ssl", "/etc/nsswitch.conf",
NULL
};
for (const char **dirs = dirs_from_core; *dirs != NULL; dirs++) {
const char *dir = *dirs;
struct stat buf;
if (access(dir, F_OK) == 0) {
sc_must_snprintf(src, sizeof src, "%s%s",
config->rootfs_dir, dir);
sc_must_snprintf(dst, sizeof dst, "%s%s",
scratch_dir, dir);
if (lstat(src, &buf) == 0
&& lstat(dst, &buf) == 0) {
sc_do_mount(src, dst, NULL, MS_BIND,
NULL);
sc_do_mount("none", dst, NULL, MS_SLAVE,
NULL);
}
}
}
}
if (config->uses_base_snap) {
// when bases are used we need to bind-mount the libexecdir
// (that contains snap-exec) into /usr/lib/snapd of the
// base snap so that snap-exec is available for the snaps
// (base snaps do not ship snapd)
// dst is always /usr/lib/snapd as this is where snapd
// assumes to find snap-exec
sc_must_snprintf(dst, sizeof dst, "%s/usr/lib/snapd",
scratch_dir);
// bind mount the current $ROOT/usr/lib/snapd path,
// where $ROOT is either "/" or the "/snap/core/current"
// that we are re-execing from
char *src = NULL;
char self[PATH_MAX + 1] = { 0 };
if (readlink("/proc/self/exe", self, sizeof(self) - 1) < 0) {
die("cannot read /proc/self/exe");
}
// this cannot happen except when the kernel is buggy
if (strstr(self, "/snap-confine") == NULL) {
die("cannot use result from readlink: %s", src);
}
src = dirname(self);
// dirname(path) might return '.' depending on path.
// /proc/self/exe should always point
// to an absolute path, but let's guarantee that.
if (src[0] != '/') {
die("cannot use the result of dirname(): %s", src);
}
sc_do_mount(src, dst, NULL, MS_BIND | MS_RDONLY, NULL);
sc_do_mount("none", dst, NULL, MS_SLAVE, NULL);
// FIXME: snapctl tool - our apparmor policy wants it in
// /usr/bin/snapctl, we will need an empty file
// here from the base snap or we need to move it
// into a different location and just symlink it
// (/usr/lib/snapd/snapctl -> /usr/bin/snapctl)
// and in the base snap case adjust PATH
//src = "/usr/bin/snapctl";
//sc_must_snprintf(dst, sizeof dst, "%s%s", scratch_dir, src);
//sc_do_mount(src, dst, NULL, MS_REC | MS_BIND, NULL);
//sc_do_mount("none", dst, NULL, MS_REC | MS_SLAVE, NULL);
}
// Bind mount the directory where all snaps are mounted. The location of
// the this directory on the host filesystem may not match the location in
// the desired root filesystem. In the "core" and "ubuntu-core" snaps the
// directory is always /snap. On the host it is a build-time configuration
// option stored in SNAP_MOUNT_DIR.
sc_must_snprintf(dst, sizeof dst, "%s/snap", scratch_dir);
sc_do_mount(SNAP_MOUNT_DIR, dst, NULL, MS_BIND | MS_REC | MS_SLAVE,
NULL);
sc_do_mount("none", dst, NULL, MS_REC | MS_SLAVE, NULL);
// Create the hostfs directory if one is missing. This directory is a part
// of packaging now so perhaps this code can be removed later.
if (access(SC_HOSTFS_DIR, F_OK) != 0) {
debug("creating missing hostfs directory");
if (mkdir(SC_HOSTFS_DIR, 0755) != 0) {
die("cannot perform operation: mkdir %s",
SC_HOSTFS_DIR);
}
}
// Ensure that hostfs isgroup owned by root. We may have (now or earlier)
// created the directory as the user who first ran a snap on a given
// system and the group identity of that user is visilbe on disk.
// This was LP:#1665004
struct stat sb;
if (stat(SC_HOSTFS_DIR, &sb) < 0) {
die("cannot stat %s", SC_HOSTFS_DIR);
}
if (sb.st_uid != 0 || sb.st_gid != 0) {
if (chown(SC_HOSTFS_DIR, 0, 0) < 0) {
die("cannot change user/group owner of %s to root",
SC_HOSTFS_DIR);
}
}
// Make the upcoming "put_old" directory for pivot_root private so that
// mount events don't propagate to any peer group. In practice pivot root
// has a number of undocumented requirements and one of them is that the
// "put_old" directory (the second argument) cannot be shared in any way.
sc_must_snprintf(dst, sizeof dst, "%s/%s", scratch_dir, SC_HOSTFS_DIR);
sc_do_mount(dst, dst, NULL, MS_BIND, NULL);
sc_do_mount("none", dst, NULL, MS_PRIVATE, NULL);
// On classic mount the nvidia driver. Ideally this would be done in an
// uniform way after pivot_root but this is good enough and requires less
// code changes the nvidia code assumes it has access to the existing
// pre-pivot filesystem.
if (config->on_classic_distro) {
sc_mount_nvidia_driver(scratch_dir);
}
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// pivot_root
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Use pivot_root to "chroot" into the scratch directory.
//
// Q: Why are we using something as esoteric as pivot_root(2)?
// A: Because this makes apparmor handling easy. Using a normal chroot
// makes all apparmor rules conditional. We are either running on an
// all-snap system where this would-be chroot didn't happen and all the
// rules see / as the root file system _OR_ we are running on top of a
// classic distribution and this chroot has now moved all paths to
// /tmp/snap.rootfs_*.
//
// Because we are using unshare(2) with CLONE_NEWNS we can essentially use
// pivot_root just like chroot but this makes apparmor unaware of the old
// root so everything works okay.
//
// HINT: If you are debugging this and are trying to see why pivot_root
// happens to return EINVAL with any changes you may be making, please
// consider applying
// misc/0001-Add-printk-based-debugging-to-pivot_root.patch to your tree
// kernel.
debug("performing operation: pivot_root %s %s", scratch_dir, dst);
if (syscall(SYS_pivot_root, scratch_dir, dst) < 0) {
die("cannot perform operation: pivot_root %s %s", scratch_dir,
dst);
}
// Unmount the self-bind mount over the scratch directory created earlier
// in the original root filesystem (which is now mounted on SC_HOSTFS_DIR).
// This way we can remove the temporary directory we created and "clean up"
// after ourselves nicely.
sc_must_snprintf(dst, sizeof dst, "%s/%s", SC_HOSTFS_DIR, scratch_dir);
sc_do_umount(dst, 0);
// Remove the scratch directory. Note that we are using the path that is
// based on the old root filesystem as after pivot_root we cannot guarantee
// what is present at the same location normally. (It is probably an empty
// /tmp directory that is populated in another place).
debug("performing operation: rmdir %s", dst);
if (rmdir(scratch_dir) < 0) {
die("cannot perform operation: rmdir %s", dst);
};
// Make the old root filesystem recursively slave. This way operations
// performed in this mount namespace will not propagate to the peer group.
// This is another essential part of the confinement system.
sc_do_mount("none", SC_HOSTFS_DIR, NULL, MS_REC | MS_SLAVE, NULL);
// Detach the redundant hostfs version of sysfs since it shows up in the
// mount table and software inspecting the mount table may become confused
// (eg, docker and LP:# 162601).
sc_must_snprintf(src, sizeof src, "%s/sys", SC_HOSTFS_DIR);
sc_do_umount(src, UMOUNT_NOFOLLOW | MNT_DETACH);
// Detach the redundant hostfs version of /dev since it shows up in the
// mount table and software inspecting the mount table may become confused.
sc_must_snprintf(src, sizeof src, "%s/dev", SC_HOSTFS_DIR);
sc_do_umount(src, UMOUNT_NOFOLLOW | MNT_DETACH);
// Detach the redundant hostfs version of /proc since it shows up in the
// mount table and software inspecting the mount table may become confused.
sc_must_snprintf(src, sizeof src, "%s/proc", SC_HOSTFS_DIR);
sc_do_umount(src, UMOUNT_NOFOLLOW | MNT_DETACH);
}
/**
* @path: a pathname where / replaced with '\0'.
* @offsetp: pointer to int showing which path segment was last seen.
* Updated on return to reflect the next segment.
* @fulllen: full original path length.
* Returns a pointer to the next path segment, or NULL if done.
*/
static char * __attribute__ ((used))
get_nextpath(char *path, size_t * offsetp, size_t fulllen)
{
size_t offset = *offsetp;
if (offset >= fulllen)
return NULL;
while (offset < fulllen && path[offset] != '\0')
offset++;
while (offset < fulllen && path[offset] == '\0')
offset++;
*offsetp = offset;
return (offset < fulllen) ? &path[offset] : NULL;
}
/**
* Check that @subdir is a subdir of @dir.
**/
static bool __attribute__ ((used))
is_subdir(const char *subdir, const char *dir)
{
size_t dirlen = strlen(dir);
size_t subdirlen = strlen(subdir);
// @dir has to be at least as long as @subdir
if (subdirlen < dirlen)
return false;
// @dir has to be a prefix of @subdir
if (strncmp(subdir, dir, dirlen) != 0)
return false;
// @dir can look like "path/" (that is, end with the directory separator).
// When that is the case then given the test above we can be sure @subdir
// is a real subdirectory.
if (dirlen > 0 && dir[dirlen - 1] == '/')
return true;
// @subdir can look like "path/stuff" and when the directory separator
// is exactly at the spot where @dir ends (that is, it was not caught
// by the test above) then @subdir is a real subdirectory.
if (subdir[dirlen] == '/' && dirlen > 0)
return true;
// If both @dir and @subdir have identical length then given that the
// prefix check above @subdir is a real subdirectory.
if (subdirlen == dirlen)
return true;
return false;
}
static int sc_open_snap_update_ns(void)
{
// +1 is for the case where the link is exactly PATH_MAX long but we also
// want to store the terminating '\0'. The readlink system call doesn't add
// terminating null, but our initialization of buf handles this for us.
char buf[PATH_MAX + 1] = { 0 };
if (readlink("/proc/self/exe", buf, sizeof buf) < 0) {
die("cannot readlink /proc/self/exe");
}
if (buf[0] != '/') { // this shouldn't happen, but make sure have absolute path
die("readlink /proc/self/exe returned relative path");
}
char *bufcopy SC_CLEANUP(sc_cleanup_string) = NULL;
bufcopy = strdup(buf);
if (bufcopy == NULL) {
die("cannot copy buffer");
}
char *dname = dirname(bufcopy);
sc_must_snprintf(buf, sizeof buf, "%s/%s", dname, "snap-update-ns");
debug("snap-update-ns executable: %s", buf);
int fd = open(buf, O_PATH | O_RDONLY | O_NOFOLLOW | O_CLOEXEC);
if (fd < 0) {
die("cannot open snap-update-ns executable");
}
debug("opened snap-update-ns executable as file descriptor %d", fd);
return fd;
}
void sc_populate_mount_ns(const char *base_snap_name, const char *snap_name)
{
// Get the current working directory before we start fiddling with
// mounts and possibly pivot_root. At the end of the whole process, we
// will try to re-locate to the same directory (if possible).
char *vanilla_cwd SC_CLEANUP(sc_cleanup_string) = NULL;
vanilla_cwd = get_current_dir_name();
if (vanilla_cwd == NULL) {
die("cannot get the current working directory");
}
// Find and open snap-update-ns from the same path as where we
// (snap-confine) were called.
int snap_update_ns_fd SC_CLEANUP(sc_cleanup_close) = -1;
snap_update_ns_fd = sc_open_snap_update_ns();
bool on_classic_distro = is_running_on_classic_distribution();
// on classic or with alternative base snaps we need to setup
// a different confinement
if (on_classic_distro || !sc_streq(base_snap_name, "core")) {
const struct sc_mount mounts[] = {
{"/dev"}, // because it contains devices on host OS
{"/etc"}, // because that's where /etc/resolv.conf lives, perhaps a bad idea
{"/home"}, // to support /home/*/snap and home interface
{"/root"}, // because that is $HOME for services
{"/proc"}, // fundamental filesystem
{"/sys"}, // fundamental filesystem
{"/tmp"}, // to get writable tmp
{"/var/snap"}, // to get access to global snap data
{"/var/lib/snapd"}, // to get access to snapd state and seccomp profiles
{"/var/tmp"}, // to get access to the other temporary directory
{"/run"}, // to get /run with sockets and what not
{"/lib/modules"}, // access to the modules of the running kernel
{"/usr/src"}, // FIXME: move to SecurityMounts in system-trace interface
{"/var/log"}, // FIXME: move to SecurityMounts in log-observe interface
#ifdef MERGED_USR
{"/run/media", true}, // access to the users removable devices
#else
{"/media", true}, // access to the users removable devices
#endif // MERGED_USR
{"/run/netns", true}, // access to the 'ip netns' network namespaces
{},
};
char rootfs_dir[PATH_MAX] = { 0 };
sc_must_snprintf(rootfs_dir, sizeof rootfs_dir,
"%s/%s/current/", SNAP_MOUNT_DIR,
base_snap_name);
if (access(rootfs_dir, F_OK) != 0) {
if (sc_streq(base_snap_name, "core")) {
// As a special fallback, allow the
// base snap to degrade from "core" to
// "ubuntu-core". This is needed for
// the migration tests.
base_snap_name = "ubuntu-core";
sc_must_snprintf(rootfs_dir, sizeof rootfs_dir,
"%s/%s/current/",
SNAP_MOUNT_DIR,
base_snap_name);
if (access(rootfs_dir, F_OK) != 0) {
die("cannot locate the core or legacy core snap (current symlink missing?)");
}
}
die("cannot locate the base snap: %s", base_snap_name);
}
struct sc_mount_config classic_config = {
.rootfs_dir = rootfs_dir,
.mounts = mounts,
.on_classic_distro = true,
.uses_base_snap = !sc_streq(base_snap_name, "core"),
};
sc_bootstrap_mount_namespace(&classic_config);
} else {
// This is what happens on an all-snap system. The rootfs we start with
// is the real outer rootfs. There are no unidirectional bind mounts
// needed because everything is already OK. We still keep the
// bidirectional /media mount point so that snaps designed for mounting
// filesystems can use that space for whatever they need.
const struct sc_mount mounts[] = {
{"/media", true},
{"/run/netns", true},
{},
};
struct sc_mount_config all_snap_config = {
.rootfs_dir = "/",
.mounts = mounts,
.uses_base_snap = !sc_streq(base_snap_name, "core"),
};
sc_bootstrap_mount_namespace(&all_snap_config);
}
// set up private mounts
// TODO: rename this and fold it into bootstrap
setup_private_mount(snap_name);
// set up private /dev/pts
// TODO: fold this into bootstrap
setup_private_pts();
// setup quirks for specific snaps
if (on_classic_distro) {
sc_setup_quirks();
}
// setup the security backend bind mounts
sc_setup_mount_profiles(snap_update_ns_fd, snap_name);
// Try to re-locate back to vanilla working directory. This can fail
// because that directory is no longer present.
if (chdir(vanilla_cwd) != 0) {
debug("cannot remain in %s, moving to the void directory",
vanilla_cwd);
if (chdir(SC_VOID_DIR) != 0) {
die("cannot change directory to %s", SC_VOID_DIR);
}
debug("successfully moved to %s", SC_VOID_DIR);
}
}
static bool is_mounted_with_shared_option(const char *dir)
__attribute__ ((nonnull(1)));
static bool is_mounted_with_shared_option(const char *dir)
{
struct sc_mountinfo *sm SC_CLEANUP(sc_cleanup_mountinfo) = NULL;
sm = sc_parse_mountinfo(NULL);
if (sm == NULL) {
die("cannot parse /proc/self/mountinfo");
}
struct sc_mountinfo_entry *entry = sc_first_mountinfo_entry(sm);
while (entry != NULL) {
const char *mount_dir = entry->mount_dir;
if (sc_streq(mount_dir, dir)) {
const char *optional_fields = entry->optional_fields;
if (strstr(optional_fields, "shared:") != NULL) {
return true;
}
}
entry = sc_next_mountinfo_entry(entry);
}
return false;
}
void sc_ensure_shared_snap_mount(void)
{
if (!is_mounted_with_shared_option("/")
&& !is_mounted_with_shared_option(SNAP_MOUNT_DIR)) {
sc_do_mount(SNAP_MOUNT_DIR, SNAP_MOUNT_DIR, "none",
MS_BIND | MS_REC, 0);
sc_do_mount("none", SNAP_MOUNT_DIR, NULL, MS_SHARED | MS_REC,
NULL);
}
}