Skip to content

Commit

Permalink
start: handle setting pdeath signal in new pidns
Browse files Browse the repository at this point in the history
In the usual case the child runs in a separate pid namespace. So far we haven't
been able to reliably set the pdeath signal. When we set the pdeath signal we
need to verify that we haven't lost a race whereby we have been orphaned and
though we have set a pdeath signal it won't help us since, well, the parent is
dead.
We were able to correctly handle this case when we were in the same pidns since
getppid() will return a valid pid. When we are in a separate pidns 0 will be
returned since the parent doesn't exist in our pidns.
A while back, while Jann and I were discussing other things he came up with a
nifty idea: simply pass an fd for the parent's status file and check the
"State:" field. This is the implementation of that idea.

Suggested-by: Jann Horn <jann@thejh.net>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
  • Loading branch information
Christian Brauner committed Apr 14, 2019
1 parent 9810d19 commit cabdf90
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 10 deletions.
30 changes: 27 additions & 3 deletions src/lxc/start.c
Expand Up @@ -681,6 +681,9 @@ void lxc_free_handler(struct lxc_handler *handler)
if (handler->conf->maincmd_fd >= 0)
lxc_abstract_unix_close(handler->conf->maincmd_fd);

if (handler->monitor_status_fd >= 0)
close(handler->monitor_status_fd);

if (handler->state_socket_pair[0] >= 0)
close(handler->state_socket_pair[0]);

Expand Down Expand Up @@ -715,6 +718,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
handler->data_sock[0] = handler->data_sock[1] = -1;
handler->conf = conf;
handler->lxcpath = lxcpath;
handler->monitor_status_fd = -EBADF;
handler->pinfd = -1;
handler->sigfd = -EBADF;
handler->init_died = false;
Expand Down Expand Up @@ -765,11 +769,17 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,

int lxc_init(const char *name, struct lxc_handler *handler)
{
__do_close_prot_errno int status_fd = -EBADF;
int ret;
const char *loglevel;
struct lxc_conf *conf = handler->conf;

handler->monitor_pid = lxc_raw_getpid();
status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC);
if (status_fd < 0) {
SYSERROR("Failed to open monitor status fd");
goto out_close_maincmd_fd;
}

lsm_init();
TRACE("Initialized LSM");
Expand Down Expand Up @@ -900,6 +910,7 @@ int lxc_init(const char *name, struct lxc_handler *handler)
TRACE("Initialized LSM");

INFO("Container \"%s\" is initialized", name);
handler->monitor_status_fd = move_fd(status_fd);
return 0;

out_destroy_cgroups:
Expand Down Expand Up @@ -1094,6 +1105,7 @@ void lxc_abort(const char *name, struct lxc_handler *handler)

static int do_start(void *data)
{
__do_close_prot_errno int status_fd = -EBADF;
int ret;
char path[PATH_MAX];
uid_t new_uid;
Expand All @@ -1106,13 +1118,18 @@ static int do_start(void *data)

lxc_sync_fini_parent(handler);

if (lxc_abstract_unix_recv_fds(handler->data_sock[1], &status_fd, 1, NULL, 0) < 0) {
ERROR("Failed to receive status file descriptor to child process");
goto out_warn_father;
}

/* This prctl must be before the synchro, so if the parent dies before
* we set the parent death signal, we will detect its death with the
* synchro right after, otherwise we have a window where the parent can
* exit before we set the pdeath signal leading to a unsupervized
* container.
*/
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
Expand Down Expand Up @@ -1190,7 +1207,7 @@ static int do_start(void *data)
goto out_warn_father;

/* set{g,u}id() clears deathsignal */
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
Expand Down Expand Up @@ -1438,7 +1455,8 @@ static int do_start(void *data)
}

if (handler->conf->monitor_signal_pdeath != SIGKILL) {
ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, handler->monitor_pid);
ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath,
handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to %d",
handler->conf->monitor_signal_pdeath);
Expand Down Expand Up @@ -1718,6 +1736,12 @@ static int lxc_spawn(struct lxc_handler *handler)

lxc_sync_fini_child(handler);

if (lxc_abstract_unix_send_fds(handler->data_sock[0], &handler->monitor_status_fd, 1, NULL, 0) < 0) {
ERROR("Failed to send status file descriptor to child process");
goto out_delete_net;
}
close_prot_errno_disarm(handler->monitor_status_fd);

/* Map the container uids. The container became an invalid userid the
* moment it was cloned with CLONE_NEWUSER. This call doesn't change
* anything immediately, but allows the container to setuid(0) (0 being
Expand Down
2 changes: 2 additions & 0 deletions src/lxc/start.h
Expand Up @@ -105,6 +105,8 @@ struct lxc_handler {
/* The monitor's pid. */
pid_t monitor_pid;

int monitor_status_fd;

/* Whether the child has already exited. */
bool init_died;

Expand Down
52 changes: 46 additions & 6 deletions src/lxc/utils.c
Expand Up @@ -1711,20 +1711,60 @@ uint64_t lxc_find_next_power2(uint64_t n)
return n;
}

int lxc_set_death_signal(int signal, pid_t parent)
static int process_dead(/* takes */ int status_fd)
{
__do_close_prot_errno int dupfd = -EBADF;
__do_free char *line = NULL;
__do_fclose FILE *f = NULL;
int ret = 0;
size_t n = 0;

dupfd = dup(status_fd);
if (dupfd < 0)
return -1;

if (fd_cloexec(dupfd, true) < 0)
return -1;

/* transfer ownership of fd */
f = fdopen(move_fd(dupfd), "re");
if (!f)
return -1;

ret = 0;
while (getline(&line, &n, f) != -1) {
char *state;

if (strncmp(line, "State:", 6))
continue;

state = lxc_trim_whitespace_in_place(line + 6);
/* only check whether process is dead or zombie for now */
if (*state == 'X' || *state == 'Z')
ret = 1;
}

return ret;
}

int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd)
{
int ret;
pid_t ppid;

ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0),
prctl_arg(0), prctl_arg(0));

/* If not in a PID namespace, check whether we have been orphaned. */
/* verify that we haven't been orphaned in the meantime */
ppid = (pid_t)syscall(SYS_getppid);
if (ppid && ppid != parent) {
ret = raise(SIGKILL);
if (ret < 0)
return -1;
if (ppid == 0) { /* parent outside our pidns */
if (parent_status_fd < 0)
return 0;

if (process_dead(parent_status_fd) == 1)
return raise(SIGKILL);
} else if (ppid != parent) {
return raise(SIGKILL);
}

if (ret < 0)
Expand Down
2 changes: 1 addition & 1 deletion src/lxc/utils.h
Expand Up @@ -253,7 +253,7 @@ static inline uint64_t lxc_getpagesize(void)
extern uint64_t lxc_find_next_power2(uint64_t n);

/* Set a signal the child process will receive after the parent has died. */
extern int lxc_set_death_signal(int signal, pid_t parent);
extern int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd);
extern int fd_cloexec(int fd, bool cloexec);
extern int recursive_destroy(char *dirname);
extern int lxc_setup_keyring(void);
Expand Down

0 comments on commit cabdf90

Please sign in to comment.