diff --git a/src/lxc/attach.c b/src/lxc/attach.c index aac38ffe1f..bfb2abf014 100644 --- a/src/lxc/attach.c +++ b/src/lxc/attach.c @@ -220,17 +220,7 @@ static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx) static int lxc_attach_to_ns(pid_t pid, int which) { - /* according to , - * the file for user namespaces in /proc/$pid/ns will be called - * 'user' once the kernel supports it - */ - static char *ns[] = { "user", "mnt", "pid", "uts", "ipc", "net", "cgroup" }; - static int flags[] = { - CLONE_NEWUSER, CLONE_NEWNS, CLONE_NEWPID, CLONE_NEWUTS, CLONE_NEWIPC, - CLONE_NEWNET, CLONE_NEWCGROUP - }; - static const int size = sizeof(ns) / sizeof(char *); - int fd[size]; + int fd[LXC_NS_MAX]; int i, j, saved_errno; @@ -239,16 +229,16 @@ static int lxc_attach_to_ns(pid_t pid, int which) return -1; } - for (i = 0; i < size; i++) { + for (i = 0; i < LXC_NS_MAX; i++) { /* ignore if we are not supposed to attach to that * namespace */ - if (which != -1 && !(which & flags[i])) { + if (which != -1 && !(which & ns_info[i].clone_flag)) { fd[i] = -1; continue; } - fd[i] = lxc_preserve_ns(pid, ns[i]); + fd[i] = lxc_preserve_ns(pid, ns_info[i].proc_name); if (fd[i] < 0) { saved_errno = errno; @@ -259,23 +249,28 @@ static int lxc_attach_to_ns(pid_t pid, int which) close(fd[j]); errno = saved_errno; - SYSERROR("failed to open namespace: '%s'.", ns[i]); + SYSERROR("failed to open namespace: '%s'.", ns_info[i].proc_name); return -1; } } - for (i = 0; i < size; i++) { - if (fd[i] >= 0 && setns(fd[i], 0) != 0) { + for (i = 0; i < LXC_NS_MAX; i++) { + if (fd[i] < 0) + continue; + + if (setns(fd[i], 0) < 0) { saved_errno = errno; - for (j = i; j < size; j++) + for (j = i; j < LXC_NS_MAX; j++) close(fd[j]); errno = saved_errno; - SYSERROR("failed to set namespace '%s'", ns[i]); + SYSERROR("Failed to attach to namespace \"%s\".", ns_info[i].proc_name); return -1; } + DEBUG("Attached to namespace \"%s\".", ns_info[i].proc_name); + close(fd[i]); } @@ -1245,11 +1240,9 @@ static int attach_child_main(void* data) flags = fcntl(fd, F_GETFL); if (flags < 0) continue; - if (flags & FD_CLOEXEC) { - if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0) { + if (flags & FD_CLOEXEC) + if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0) SYSERROR("Unable to clear CLOEXEC from fd"); - } - } } /* we don't need proc anymore */ diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c index bb76f4c903..3a5b3bef6c 100644 --- a/src/lxc/namespace.c +++ b/src/lxc/namespace.c @@ -69,12 +69,28 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags) return ret; } +/* Leave the user namespace at the first position in the array of structs so + * that we always attach to it first when iterating over the struct and using + * setns() to switch namespaces. This especially affects lxc_attach(): Suppose + * you cloned a new user namespace and mount namespace as an unprivileged user + * on the host and want to setns() to the mount namespace. This requires you to + * attach to the user namespace first otherwise the kernel will fail this check: + * + * if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || + * !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || + * !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) + * return -EPERM; + * + * in + * + * linux/fs/namespace.c:mntns_install(). + */ const struct ns_info ns_info[LXC_NS_MAX] = { + [LXC_NS_USER] = {"user", CLONE_NEWUSER, "CLONE_NEWUSER"}, [LXC_NS_MNT] = {"mnt", CLONE_NEWNS, "CLONE_NEWNS"}, [LXC_NS_PID] = {"pid", CLONE_NEWPID, "CLONE_NEWPID"}, [LXC_NS_UTS] = {"uts", CLONE_NEWUTS, "CLONE_NEWUTS"}, [LXC_NS_IPC] = {"ipc", CLONE_NEWIPC, "CLONE_NEWIPC"}, - [LXC_NS_USER] = {"user", CLONE_NEWUSER, "CLONE_NEWUSER"}, [LXC_NS_NET] = {"net", CLONE_NEWNET, "CLONE_NEWNET"}, [LXC_NS_CGROUP] = {"cgroup", CLONE_NEWCGROUP, "CLONE_NEWCGROUP"} }; diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h index 57167f4d04..4916950c15 100644 --- a/src/lxc/namespace.h +++ b/src/lxc/namespace.h @@ -54,11 +54,11 @@ #endif enum { + LXC_NS_USER, LXC_NS_MNT, LXC_NS_PID, LXC_NS_UTS, LXC_NS_IPC, - LXC_NS_USER, LXC_NS_NET, LXC_NS_CGROUP, LXC_NS_MAX