Skip to content
This repository has been archived by the owner on Apr 15, 2020. It is now read-only.

Commit

Permalink
get_tasks: don't hang proxy if tasks are exiting
Browse files Browse the repository at this point in the history
In both GetTasks and GetTasksRecursive in the proxy, switch to a 1-second
select plus recvmsg(MSG_DONTWAIT) when waiting for pids from the cgmanager.
if the cgmanager sees a credential send failed, then it will resend the
first pid which succeeded, so that the proxy just gets a dup.  If no previous
pid has succeeded, or if that pid has now also exited, then we'll simply exit,
and the proxy will now gracefully fail.

Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
  • Loading branch information
hallyn committed Nov 27, 2014
1 parent 2ac9c62 commit 9cae5aa
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 15 deletions.
18 changes: 14 additions & 4 deletions access_checks.c
Expand Up @@ -107,8 +107,11 @@ int send_creds(int sock, struct ucred *cred)
msg.msg_iovlen = 1;

if (sendmsg(sock, &msg, 0) < 0) {
int saved_errno = errno;
nih_error("%s: failed at sendmsg: %s", __func__,
strerror(errno));
if (saved_errno == 3)
return -3;
return -1;
}
return 0;
Expand All @@ -124,6 +127,8 @@ int send_creds(int sock, struct ucred *cred)
void get_scm_creds_sync(int sock, struct ucred *cred)
{
struct msghdr msg = { 0 };
struct timeval tv;
fd_set rfds;
struct iovec iov;
struct cmsghdr *cmsg;
char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
Expand Down Expand Up @@ -155,10 +160,15 @@ void get_scm_creds_sync(int sock, struct ucred *cred)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;

// retry logic is not ideal, especially as we are not
// threaded. Sleep at most 1 second waiting for the client
// to send us the scm_cred
ret = recvmsg(sock, &msg, 0);
FD_ZERO(&rfds);
FD_SET(sock, &rfds);

tv.tv_sec = 1;
tv.tv_usec = 0;
if (select(sock+1, &rfds, NULL, NULL, &tv) < 0) {
return;
}
ret = recvmsg(sock, &msg, MSG_DONTWAIT);
if (ret < 0) {
nih_error("Failed to receive scm_cred: %s",
strerror(errno));
Expand Down
4 changes: 2 additions & 2 deletions cgmanager-proxy.c
Expand Up @@ -867,7 +867,7 @@ int get_tasks_main (void *parent, const char *controller, const char *cgroup,
for (i=0; i<nrpids; i++) {
get_scm_creds_sync(sv[0], &tcred);
if (tcred.pid == -1) {
nih_error("%s: Failed getting pid from server",
nih_warn("%s: Failed getting pid from server",
__func__);
goto out;
}
Expand Down Expand Up @@ -943,7 +943,7 @@ int get_tasks_recursive_main (void *parent, const char *controller,
for (i=0; i<nrpids; i++) {
get_scm_creds_sync(sv[0], &tcred);
if (tcred.pid == -1) {
nih_error("%s: Failed getting pid from server",
nih_warn("%s: Failed getting pid from server",
__func__);
goto out;
}
Expand Down
48 changes: 39 additions & 9 deletions frontend.c
Expand Up @@ -1168,11 +1168,15 @@ int cgmanager_remove (void *data, NihDBusMessage *message, const char *controlle
return ret;
}

/* get_tasks - list tasks for a single cgroup */
/*
* get_tasks - list tasks for a single cgroup
* returns -1 on error, 0 on success.
*/
void get_tasks_scm_complete(struct scm_sock_data *data)
{
struct ucred pcred;
int i, ret;
pid_t firstvalid = -1;
int32_t *pids, nrpids;
ret = get_tasks_main(data, data->controller, data->cgroup,
data->pcred, data->rcred, &pids);
Expand All @@ -1187,12 +1191,25 @@ void get_tasks_scm_complete(struct scm_sock_data *data)
return;
}
pcred.uid = 0; pcred.gid = 0;
for (i=0; i<ret; i++) {
for (i=0; i<nrpids; i++) {
pcred.pid = pids[i];
if (send_creds(data->fd, &pcred)) {
nih_error("get_tasks_scm: error writing pids back to client");
again:
ret = send_creds(data->fd, &pcred);
if (ret == -3) {
if (firstvalid == -1 || firstvalid == pcred.pid) {
nih_error("gettasks: too much pid churn. Last valid pid was %d\n",
firstvalid);
return;
}
nih_info("gettasks: sending dup pid %d in place of exited pid %d\n",
firstvalid, pcred.pid);
pcred.pid = firstvalid;
goto again;

} else if (ret < 0)
return;
}
if (firstvalid == -1)
firstvalid = pids[i];
}
}

Expand Down Expand Up @@ -1275,6 +1292,7 @@ void get_tasks_recursive_scm_complete(struct scm_sock_data *data)
{
struct ucred pcred;
int i, ret;
pid_t firstvalid = -1;
int32_t *pids, nrpids;

ret = get_tasks_recursive_main(data, data->controller, data->cgroup,
Expand All @@ -1290,12 +1308,24 @@ void get_tasks_recursive_scm_complete(struct scm_sock_data *data)
return;
}
pcred.uid = 0; pcred.gid = 0;
for (i=0; i<ret; i++) {
for (i=0; i<nrpids; i++) {
pcred.pid = pids[i];
if (send_creds(data->fd, &pcred)) {
nih_error("get_tasks_recursive_scm: error writing pids back to client");
again:
ret = send_creds(data->fd, &pcred);
if (ret == -3) {
if (firstvalid == -1 || firstvalid == pcred.pid) {
nih_error("gettasks: too much pid churn. Last valid pid was %d\n",
firstvalid);
return;
}
nih_info("gettasks: sending dup pid %d in place of exited pid %d\n",
firstvalid, pcred.pid);
pcred.pid = firstvalid;
goto again;
} else if (ret < 0)
return;
}
if (firstvalid == -1)
firstvalid = pids[i];
}
}

Expand Down

0 comments on commit 9cae5aa

Please sign in to comment.