Skip to content

Commit

Permalink
Merge pull request #307 from rhc54/topic/err
Browse files Browse the repository at this point in the history
Cleanup error returns
  • Loading branch information
rhc54 committed Jan 22, 2020
2 parents 652b1d2 + ece86e5 commit d31f0db
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/mca/errmgr/dvm/errmgr_dvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -184,7 +184,7 @@ static void job_errors(int fd, short args, void *cbdata)
* we only inform the submitter of the problem, but do NOT terminate
* the DVM itself */

rc = jobstate;
rc = prrte_pmix_convert_job_state_to_error(jobstate);
answer = PRRTE_NEW(prrte_buffer_t);
if (PRRTE_SUCCESS != (ret = prrte_dss.pack(answer, &rc, 1, PRRTE_INT32))) {
PRRTE_ERROR_LOG(ret);
Expand Down
3 changes: 2 additions & 1 deletion src/pmix/pmix-internal.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Research Organization for Information Science
Expand Down Expand Up @@ -436,6 +436,7 @@ PRRTE_EXPORT pmix_proc_state_t prrte_pmix_convert_state(int state);
PRRTE_EXPORT int prrte_pmix_convert_pstate(pmix_proc_state_t);
PRRTE_EXPORT pmix_status_t prrte_pmix_convert_rc(int rc);
PRRTE_EXPORT int prrte_pmix_convert_status(pmix_status_t status);
PRRTE_EXPORT pmix_status_t prrte_pmix_convert_job_state_to_error(int state);

#define PRRTE_PMIX_CONVERT_JOBID(n, j) \
(void)prrte_snprintf_jobid((n), PMIX_MAX_NSLEN, (j))
Expand Down
66 changes: 64 additions & 2 deletions src/pmix/pmix.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
/*
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
Expand Down Expand Up @@ -36,6 +36,25 @@
pmix_status_t prrte_pmix_convert_rc(int rc)
{
switch (rc) {
case PRRTE_ERR_FAILED_TO_START:
return PMIX_ERR_JOB_FAILED_TO_START;

case PRRTE_ERR_HEARTBEAT_ALERT:
case PRRTE_ERR_FILE_ALERT:
case PRRTE_ERR_HEARTBEAT_LOST:
case PRRTE_ERR_SENSOR_LIMIT_EXCEEDED:
return PMIX_ERR_JOB_SENSOR_BOUND_EXCEEDED;

case PRRTE_ERR_NO_EXE_SPECIFIED:
case PRRTE_ERR_NO_APP_SPECIFIED:
return PMIX_ERR_JOB_NO_EXE_SPECIFIED;

case PRRTE_ERR_FAILED_TO_MAP:
return PMIX_ERR_JOB_FAILED_TO_MAP;

case PRRTE_ERR_JOB_CANCELLED:
return PMIX_ERR_JOB_CANCELLED;

case PRRTE_ERR_DEBUGGER_RELEASE:
return PMIX_ERR_DEBUGGER_RELEASE;

Expand Down Expand Up @@ -113,7 +132,7 @@ pmix_status_t prrte_pmix_convert_rc(int rc)
case PRRTE_SUCCESS:
return PMIX_SUCCESS;
default:
return rc;
return PMIX_ERROR;
}
}

Expand Down Expand Up @@ -299,6 +318,49 @@ int prrte_pmix_convert_pstate(pmix_proc_state_t state)
}
}

pmix_status_t prrte_pmix_convert_job_state_to_error(int state)
{
switch(state) {
case PRRTE_JOB_STATE_ALLOC_FAILED:
return PMIX_ERR_JOB_ALLOC_FAILED;

case PRRTE_JOB_STATE_MAP_FAILED:
return PMIX_ERR_JOB_FAILED_TO_MAP;

case PRRTE_JOB_STATE_NEVER_LAUNCHED:
return PMIX_ERR_JOB_NEVER_LAUNCHED;

case PRRTE_JOB_STATE_FAILED_TO_LAUNCH:
return PMIX_ERR_JOB_FAILED_TO_LAUNCH;

case PRRTE_JOB_STATE_FAILED_TO_START:
return PMIX_ERR_JOB_FAILED_TO_START;

case PRRTE_JOB_STATE_CANNOT_LAUNCH:
return PMIX_ERR_JOB_CANNOT_LAUNCH;

case PRRTE_JOB_STATE_TERMINATED:
return PMIX_ERR_JOB_TERMINATED;

case PRRTE_JOB_STATE_KILLED_BY_CMD:
return PMIX_ERR_JOB_CANCELLED;

case PRRTE_JOB_STATE_ABORTED:
case PRRTE_JOB_STATE_CALLED_ABORT:
case PRRTE_JOB_STATE_SILENT_ABORT:
return PMIX_ERR_JOB_ABORTED;

case PRRTE_JOB_STATE_ABORTED_BY_SIG:
return PMIX_ERR_JOB_ABORTED_BY_SIG;

case PRRTE_JOB_STATE_ABORTED_WO_SYNC:
return PMIX_ERR_JOB_TERM_WO_SYNC;

default:
return PMIX_ERROR;
}
}

void prrte_pmix_value_load(pmix_value_t *v,
prrte_value_t *kv)
{
Expand Down
23 changes: 14 additions & 9 deletions src/prted/pmix/pmix_server_dyn.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
Expand Down Expand Up @@ -64,26 +64,28 @@ void pmix_server_launch_resp(int status, prrte_process_name_t* sender,
prrte_job_t *jdata;
char nspace[PMIX_MAX_NSLEN+1];
pmix_proc_t proc;
pmix_status_t xrc;

/* unpack the status */
/* unpack the status - this is already a PMIx value */
cnt = 1;
if (PRRTE_SUCCESS != (rc = prrte_dss.unpack(buffer, &ret, &cnt, PRRTE_INT32))) {
PRRTE_ERROR_LOG(rc);
return;
ret = prrte_pmix_convert_rc(rc);
}

/* unpack the jobid */
cnt = 1;
if (PRRTE_SUCCESS != (rc = prrte_dss.unpack(buffer, &jobid, &cnt, PRRTE_JOBID))) {
PRRTE_ERROR_LOG(rc);
return;
ret = prrte_pmix_convert_rc(rc);
}
/* we let the above errors fall thru in the vain hope that the room number can
* be successfully unpacked, thus allowing us to respond to the requestor */

/* unpack our tracking room number */
cnt = 1;
if (PRRTE_SUCCESS != (rc = prrte_dss.unpack(buffer, &room, &cnt, PRRTE_INT))) {
PRRTE_ERROR_LOG(rc);
/* we are hosed */
return;
}

Expand All @@ -100,7 +102,6 @@ void pmix_server_launch_resp(int status, prrte_process_name_t* sender,
PRRTE_PMIX_CONVERT_JOBID(nspace, jobid);
req->spcbfunc(ret, nspace, req->cbdata);
} else if (NULL != req->toolcbfunc) {
xrc = prrte_pmix_convert_rc(ret);
/* if success, then add to our job info */
if (PRRTE_SUCCESS == ret) {
jdata = PRRTE_NEW(prrte_job_t);
Expand All @@ -110,7 +111,7 @@ void pmix_server_launch_resp(int status, prrte_process_name_t* sender,
prrte_pmix_server_tool_conn_complete(jdata, req);
PRRTE_PMIX_CONVERT_NAME(&proc, &req->target);
}
req->toolcbfunc(xrc, &proc, req->cbdata);
req->toolcbfunc(ret, &proc, req->cbdata);
}
/* cleanup */
PRRTE_RELEASE(req);
Expand All @@ -123,6 +124,7 @@ static void spawn(int sd, short args, void *cbdata)
prrte_buffer_t *buf;
prrte_plm_cmd_flag_t command;
char nspace[PMIX_MAX_NSLEN+1];
pmix_status_t prc;

PRRTE_ACQUIRE_OBJECT(req);

Expand Down Expand Up @@ -169,8 +171,9 @@ static void spawn(int sd, short args, void *cbdata)
callback:
/* this section gets executed solely upon an error */
if (NULL != req->spcbfunc) {
prc = prrte_pmix_convert_rc(rc);
PRRTE_PMIX_CONVERT_JOBID(nspace, PRRTE_JOBID_INVALID);
req->spcbfunc(rc, nspace, req->cbdata);
req->spcbfunc(prc, nspace, req->cbdata);
}
PRRTE_RELEASE(req);
}
Expand Down Expand Up @@ -572,8 +575,10 @@ static void interim(int sd, short args, void *cbdata)
complete:
if (NULL != cd->spcbfunc) {
pmix_proc_t pproc;
pmix_status_t prc;
PRRTE_PMIX_CONVERT_JOBID(pproc.nspace, PRRTE_JOBID_INVALID);
cd->spcbfunc(rc, pproc.nspace, cd->cbdata);
prc = prrte_pmix_convert_rc(rc);
cd->spcbfunc(prc, pproc.nspace, cd->cbdata);
}
PRRTE_RELEASE(cd);
}
Expand Down

0 comments on commit d31f0db

Please sign in to comment.