Skip to content

Commit

Permalink
Merge pull request #93 from rhc54/topic/status
Browse files Browse the repository at this point in the history
Preserve and return the app exit status
  • Loading branch information
Ralph Castain committed Aug 8, 2018
2 parents 093174a + b726e16 commit ffe3dd3
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 13 deletions.
22 changes: 13 additions & 9 deletions orte/mca/errmgr/dvm/errmgr_dvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,12 @@ static void proc_errors(int fd, short args, void *cbdata)
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED);
/* update our exit code */
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* just in case the exit code hadn't been set, do it here - this
* won't override any reported exit code */
ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_COMM_FAILURE);
if (0 == jdata->exit_code) {
jdata->exit_code = ORTE_ERR_COMM_FAILURE;
}
}
goto cleanup;
}
Expand Down Expand Up @@ -405,7 +407,7 @@ static void proc_errors(int fd, short args, void *cbdata)
/* retain the object so it doesn't get free'd */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED);
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* kill the job */
_terminate_job(jdata->jobid);
}
Expand All @@ -423,7 +425,7 @@ static void proc_errors(int fd, short args, void *cbdata)
/* retain the object so it doesn't get free'd */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED);
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* kill the job */
_terminate_job(jdata->jobid);
}
Expand All @@ -441,13 +443,15 @@ static void proc_errors(int fd, short args, void *cbdata)
/* retain the object so it doesn't get free'd */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED);
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* now treat a special case - if the proc exit'd without a required
* sync, it may have done so with a zero exit code. We want to ensure
* that the user realizes there was an error, so in this -one- case,
* we overwrite the process' exit code with the default error code
*/
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
if (0 == jdata->exit_code) {
jdata->exit_code = ORTE_ERROR_DEFAULT_EXIT_CODE;
}
/* kill the job */
_terminate_job(jdata->jobid);
}
Expand Down Expand Up @@ -537,7 +541,7 @@ static void proc_errors(int fd, short args, void *cbdata)
/* retain the object so it doesn't get free'd */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED);
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* kill the job */
_terminate_job(jdata->jobid);
}
Expand All @@ -549,7 +553,7 @@ static void proc_errors(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
pptr->exit_code));
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* track the number of non-zero exits */
i32 = 0;
i32ptr = &i32;
Expand Down Expand Up @@ -588,7 +592,7 @@ static void proc_errors(int fd, short args, void *cbdata)
/* retain the object so it doesn't get free'd */
OBJ_RETAIN(pptr);
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED);
ORTE_UPDATE_EXIT_STATUS(pptr->exit_code);
jdata->exit_code = pptr->exit_code;
/* kill the job */
_terminate_job(jdata->jobid);
}
Expand Down
4 changes: 2 additions & 2 deletions orte/mca/state/dvm/state_dvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,12 +629,12 @@ static void dvm_notify(int sd, short args, void *cbdata)

/* see if there was any problem */
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_ABORTED_PROC, (void**)&pptr, OPAL_PTR) && NULL != pptr) {
ret = opal_pmix_convert_rc(pptr->exit_code);
ret = opal_pmix_convert_rc(jdata->exit_code);
/* or whether we got cancelled by the user */
} else if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CANCELLED, NULL, OPAL_BOOL)) {
ret = opal_pmix_convert_rc(ORTE_ERR_JOB_CANCELLED);
} else {
ret = PMIX_SUCCESS;
ret = opal_pmix_convert_rc(jdata->exit_code);
}

if (0 == ret && orte_get_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION, NULL, OPAL_BOOL)) {
Expand Down
1 change: 1 addition & 0 deletions orte/runtime/orte_globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ OBJ_CLASS_INSTANCE(orte_app_context_t,

static void orte_job_construct(orte_job_t* job)
{
job->exit_code = 0;
job->personality = NULL;
job->jobid = ORTE_JOBID_INVALID;
job->offset = 0;
Expand Down
2 changes: 2 additions & 0 deletions orte/runtime/orte_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t);
typedef struct {
/** Base object so this can be put on a list */
opal_list_item_t super;
/* record the exit status for this job */
int exit_code;
/* personality for this job */
char **personality;
/* jobid for this job */
Expand Down
8 changes: 6 additions & 2 deletions orte/tools/prun/prun.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ static void evhandler(size_t evhdlr_registration_id,
ORTE_JOBID_PRINT(jobid), jobstatus);
}
}
/* save the status */
lock->status = jobstatus;
/* release the lock */
OPAL_PMIX_WAKEUP_THREAD(lock);

Expand Down Expand Up @@ -390,7 +392,7 @@ static void clean_abort(int fd, short flags, void *arg);

int prun(int argc, char *argv[])
{
int rc, i;
int rc=1, i;
char *param, *ptr;
opal_pmix_lock_t lock, rellock;
opal_list_t apps;
Expand Down Expand Up @@ -1078,6 +1080,8 @@ int prun(int argc, char *argv[])

OPAL_PMIX_WAIT_THREAD(&rellock);
OPAL_PMIX_DESTRUCT_LOCK(&rellock);
/* save the status */
rc = rellock.status;

OPAL_PMIX_CONSTRUCT_LOCK(&lock);
PMIx_Deregister_event_handler(evid, opcbfunc, &lock);
Expand All @@ -1089,7 +1093,7 @@ int prun(int argc, char *argv[])
PMIx_tool_finalize();
opal_progress_thread_finalize(NULL);
opal_finalize();
return 0;
return rc;
}

static int parse_locals(opal_list_t *jdata, int argc, char* argv[])
Expand Down

0 comments on commit ffe3dd3

Please sign in to comment.