Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 1 addition & 65 deletions orte/mca/odls/alps/odls_alps_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,70 +164,6 @@ orte_odls_base_module_t orte_odls_alps_module = {
};


static bool odls_alps_child_died(orte_proc_t *child)
{
time_t end;
pid_t ret;

/* Because of rounding in time (which returns whole seconds) we
* have to add 1 to our wait number: this means that we wait
* somewhere between (target) and (target)+1 seconds. Otherwise,
* the default 1s actually means 'somwhere between 0 and 1s'. */
end = time(NULL) + orte_odls_globals.timeout_before_sigkill + 1;
do {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps:WAITPID CHECKING PID %d WITH TIMEOUT %d SECONDS",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid),
orte_odls_globals.timeout_before_sigkill + 1));
ret = waitpid(child->pid, &child->exit_code, WNOHANG);
if (child->pid == ret) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps:WAITPID INDICATES PROC %d IS DEAD",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
/* It died -- return success */
return true;
} else if (0 == ret) {
/* with NOHANG specified, if a process has already exited
* while waitpid was registered, then waitpid returns 0
* as there is no error - this is a race condition problem
* that occasionally causes us to incorrectly report a proc
* as refusing to die. Unfortunately, errno may not be reset
* by waitpid in this case, so we cannot check it.
*
* (note the previous fix to this, to return 'process dead'
* here, fixes the race condition at the cost of reporting
* all live processes have immediately died! Better to
* occasionally report a dead process as still living -
* which will occasionally trip the timeout for cases that
* are right on the edge.)
*/
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps:WAITPID INDICATES PID %d MAY HAVE ALREADY EXITED",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
/* Do nothing, process still alive */
} else if (-1 == ret && ECHILD == errno) {
/* The pid no longer exists, so we'll call this "good
enough for government work" */
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps:WAITPID INDICATES PID %d NO LONGER EXISTS",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
return true;
}

/* Bogus delay for 1 msec - let's actually give the CPU some time
* to quit the other process (sched_yield() -- even if we have it
* -- changed behavior in 2.6.3x Linux flavors to be undesirable)
* Don't use select on a bogus file descriptor here as it has proven
* unreliable and sometimes immediately returns - we really, really
* -do- want to wait a bit!
*/
usleep(1000);
} while (time(NULL) < end);

/* The child didn't die, so return false */
return false;
}

static int odls_alps_kill_local(pid_t pid, int signum)
{
pid_t pgrp;
Expand Down Expand Up @@ -264,7 +200,7 @@ int orte_odls_alps_kill_local_procs(opal_pointer_array_t *procs)
int rc;

if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(procs,
odls_alps_kill_local, odls_alps_child_died))) {
odls_alps_kill_local))) {
ORTE_ERROR_LOG(rc);
return rc;
}
Expand Down
89 changes: 73 additions & 16 deletions orte/mca/odls/base/odls_base_default_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -1366,16 +1366,65 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata)
ORTE_ACTIVATE_PROC_STATE(&proc->name, state);
}

typedef struct {
opal_object_t super;
orte_proc_t *child;
orte_odls_base_kill_local_fn_t kill_local;
} orte_odls_quick_caddy_t;
static void qcdcon(orte_odls_quick_caddy_t *p)
{
p->child = NULL;
p->kill_local = NULL;
}
static void qcddes(orte_odls_quick_caddy_t *p)
{
if (NULL != p->child) {
OBJ_RELEASE(p->child);
}
}
OBJ_CLASS_INSTANCE(orte_odls_quick_caddy_t,
opal_object_t,
qcdcon, qcddes);

static void send_kill(int sd, short args, void *cbdata)
{
orte_timer_t *tm = (orte_timer_t*)cbdata;
orte_odls_quick_caddy_t *cd = (orte_odls_quick_caddy_t*)tm->payload;

OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING FORCE SIGKILL TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&cd->child->name)));

cd->kill_local(cd->child->pid, SIGKILL);
/* indicate the waitpid fired as this is effectively what
* has happened
*/
ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_WAITPID);
cd->child->pid = 0;

/* ensure the child's session directory is cleaned up */
orte_session_dir_finalize(&cd->child->name);
/* check for everything complete - this will remove
* the child object from our local list
*/
if (ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_WAITPID)) {
ORTE_ACTIVATE_PROC_STATE(&cd->child->name, cd->child->state);
}
OBJ_RELEASE(cd);
}

int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
orte_odls_base_kill_local_fn_t kill_local,
orte_odls_base_child_died_fn_t child_died)
orte_odls_base_kill_local_fn_t kill_local)
{
orte_proc_t *child;
opal_list_t procs_killed;
orte_proc_t *proc, proctmp;
int i, j;
opal_pointer_array_t procarray, *procptr;
bool do_cleanup;
orte_odls_quick_caddy_t *cd;

OBJ_CONSTRUCT(&procs_killed, opal_list_t);

Expand Down Expand Up @@ -1492,22 +1541,30 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
*/
orte_wait_cb_cancel(child);

/* Use SIGKILL just to make sure things are dead
* This fixes an issue that, if the application is masking
* SIGTERM, then the child_died() may return 'true' even
* though waipid returns with 0. It does this to avoid a
* race condition, per documentation in odls_default_module.c.
*/
/* First send a SIGCONT in case the process is in stopped state.
If it is in a stopped state and we do not first change it to
running, then SIGTERM will not get delivered. Ignore return
value. */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING FORCE SIGKILL TO %s pid %lu",
"%s SENDING SIGCONT TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name), (unsigned long)child->pid));
kill_local(child->pid, SIGKILL);
/* indicate the waitpid fired as this is effectively what
* has happened
*/
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID);
child->pid = 0;
ORTE_NAME_PRINT(&child->name)));
kill_local(child->pid, SIGCONT);

/* Send a sigterm to the process before sigkill to be nice */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING SIGTERM TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name)));
kill_local(child->pid, SIGTERM);

cd = OBJ_NEW(orte_odls_quick_caddy_t);
OBJ_RETAIN(child);
cd->child = child;
cd->kill_local = kill_local;
ORTE_DETECT_TIMEOUT(1, orte_odls_globals.timeout_before_sigkill,
10000000, send_kill, cd);
continue;

CLEANUP:
/* ensure the child's session directory is cleaned up */
Expand Down
3 changes: 1 addition & 2 deletions orte/mca/odls/base/odls_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ typedef bool (*orte_odls_base_child_died_fn_t)(orte_proc_t *child);

ORTE_DECLSPEC int
orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
orte_odls_base_kill_local_fn_t kill_local,
orte_odls_base_child_died_fn_t child_died);
orte_odls_base_kill_local_fn_t kill_local);

ORTE_DECLSPEC int orte_odls_base_default_restart_proc(orte_proc_t *child,
orte_odls_base_fork_local_proc_fn_t fork_local);
Expand Down
67 changes: 1 addition & 66 deletions orte/mca/odls/default/odls_default_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,71 +164,6 @@ orte_odls_base_module_t orte_odls_default_module = {
};


static bool odls_default_child_died(orte_proc_t *child)
{
time_t end;
pid_t ret;

/* Because of rounding in time (which returns whole seconds) we
* have to add 1 to our wait number: this means that we wait
* somewhere between (target) and (target)+1 seconds. Otherwise,
* the default 1s actually means 'somwhere between 0 and 1s'. */
end = time(NULL) + orte_odls_globals.timeout_before_sigkill + 1;
do {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:default:WAITPID CHECKING PID %d WITH TIMEOUT %d SECONDS",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid),
orte_odls_globals.timeout_before_sigkill + 1));
ret = waitpid(child->pid, &child->exit_code, WNOHANG);
if (child->pid == ret) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:default:WAITPID INDICATES PROC %d IS DEAD",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
/* It died -- return success */
return true;
} else if (0 == ret) {
/* with NOHANG specified, if a process has already exited
* while waitpid was registered, then waitpid returns 0
* as there is no error - this is a race condition problem
* that occasionally causes us to incorrectly report a proc
* as refusing to die. Unfortunately, errno may not be reset
* by waitpid in this case, so we cannot check it.
*
* (note the previous fix to this, to return 'process dead'
* here, fixes the race condition at the cost of reporting
* all live processes have immediately died! Better to
* occasionally report a dead process as still living -
* which will occasionally trip the timeout for cases that
* are right on the edge.)
*/
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:default:WAITPID INDICATES PID %d MAY HAVE ALREADY EXITED",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
/* Do nothing, process still alive */
} else if (-1 == ret && ECHILD == errno) {
/* The pid no longer exists, so we'll call this "good
enough for government work" */
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:default:WAITPID INDICATES PID %d NO LONGER EXISTS",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
return true;
}

/* Bogus delay for 1 msec - let's actually give the CPU some time
* to quit the other process (sched_yield() -- even if we have it
* -- changed behavior in 2.6.3x Linux flavors to be undesirable)
* Don't use select on a bogus file descriptor here as it has proven
* unreliable and sometimes immediately returns - we really, really
* -do- want to wait a bit!
*/
usleep(1000);
} while (time(NULL) < end);

/* The child didn't die, so return false */
return false;
}


/* deliver a signal to a specified pid. */
static int odls_default_kill_local(pid_t pid, int signum)
{
Expand All @@ -251,7 +186,7 @@ int orte_odls_default_kill_local_procs(opal_pointer_array_t *procs)
int rc;

if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(procs,
odls_default_kill_local, odls_default_child_died))) {
odls_default_kill_local))) {
ORTE_ERROR_LOG(rc);
return rc;
}
Expand Down