From 7434c476262f11f8d033d0101187a4ba253f2a17 Mon Sep 17 00:00:00 2001 From: annu13 Date: Fri, 2 Oct 2015 17:17:48 -0700 Subject: [PATCH 1/5] sync with PMIX master --- opal/mca/pmix/pmix1xx/pmix/Makefile.am | 1 + opal/mca/pmix/pmix1xx/pmix/VERSION | 4 +- opal/mca/pmix/pmix1xx/pmix/examples/fault.c | 18 +- opal/mca/pmix/pmix1xx/pmix/examples/server.c | 25 +- .../pmix/include/pmix/pmix_common.h.in | 72 +++- .../pmix/pmix1xx/pmix/include/pmix_server.h | 82 ++-- opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c | 56 ++- .../pmix1xx/pmix/src/server/pmix_server.c | 35 +- .../pmix1xx/pmix/src/server/pmix_server_ops.h | 16 +- opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c | 14 + opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h | 8 + opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c | 398 +++++++++++++++++- opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c | 4 +- .../pmix/pmix1xx/pmix/test/simple/simpft.c | 28 +- .../pmix/pmix1xx/pmix/test/simple/simptest.c | 24 +- opal/mca/pmix/pmix1xx/pmix1_client.c | 20 +- opal/mca/pmix/pmix1xx/pmix1_server_north.c | 8 + opal/mca/pmix/pmix1xx/pmix1_server_south.c | 17 +- 18 files changed, 691 insertions(+), 139 deletions(-) diff --git a/opal/mca/pmix/pmix1xx/pmix/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/Makefile.am index 3f2f13a8d30..3cc7e270c86 100644 --- a/opal/mca/pmix/pmix1xx/pmix/Makefile.am +++ b/opal/mca/pmix/pmix1xx/pmix/Makefile.am @@ -50,6 +50,7 @@ include src/usock/Makefile.am include src/client/Makefile.am include src/server/Makefile.am include src/sec/Makefile.am +include src/common/Makefile.am lib_LTLIBRARIES = libpmix.la diff --git a/opal/mca/pmix/pmix1xx/pmix/VERSION b/opal/mca/pmix/pmix1xx/pmix/VERSION index 567748b1b02..3952e1f675c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/VERSION +++ b/opal/mca/pmix/pmix1xx/pmix/VERSION @@ -30,7 +30,7 @@ greek=a1 # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git0a8e0d9 +repo_rev=git3c37421 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Sep 23, 2015" +date="Oct 02, 2015" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/fault.c b/opal/mca/pmix/pmix1xx/pmix/examples/fault.c index 59b813e5653..4735118366e 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/fault.c +++ b/opal/mca/pmix/pmix1xx/pmix/examples/fault.c @@ -43,6 +43,20 @@ static void notification_fn(pmix_status_t status, completed = true; } +static void op_callbk(pmix_status_t status, + void *cbdata) +{ + fprintf(stderr, "client: OP CALLBACK CALLED WITH STATUS %d", status); +} + +static void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + fprintf(stderr, "cleint: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + status, errhandler_ref); +} + int main(int argc, char **argv) { int rc; @@ -69,7 +83,7 @@ int main(int argc, char **argv) completed = false; /* register our errhandler */ - PMIx_Register_errhandler(NULL, 0, notification_fn); + PMIx_Register_errhandler(NULL, 0, notification_fn, errhandler_reg_callbk, NULL); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); @@ -97,7 +111,7 @@ int main(int argc, char **argv) done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - PMIx_Deregister_errhandler(); + PMIx_Deregister_errhandler(0, op_callbk, NULL); if (PMIX_SUCCESS != (rc = PMIx_Finalize())) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/server.c b/opal/mca/pmix/pmix1xx/pmix/examples/server.c index d10946fd6e8..e80ce99e53f 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/server.c +++ b/opal/mca/pmix/pmix1xx/pmix/examples/server.c @@ -140,7 +140,10 @@ static void set_namespace(int nprocs, char *ranks, char *nspace, static void errhandler(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo); - +static void op_callbk(pmix_status_t status, void *cbdata); +static void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata); static void opcbfunc(pmix_status_t status, void *cbdata) { myxfer_t *x = (myxfer_t*)cbdata; @@ -175,7 +178,7 @@ int main(int argc, char **argv) return rc; } /* register the errhandler */ - PMIx_Register_errhandler(NULL, 0, errhandler); + PMIx_Register_errhandler(NULL, 0, errhandler, errhandler_reg_callbk, NULL); /* setup the pub data, in case it is used */ PMIX_CONSTRUCT(&pubdata, pmix_list_t); @@ -282,7 +285,7 @@ int main(int argc, char **argv) pmix_argv_free(client_env); /* deregister the errhandler */ - PMIx_Deregister_errhandler(); + PMIx_Deregister_errhandler(0, op_callbk, NULL); /* release any pub data */ PMIX_LIST_DESTRUCT(&pubdata); @@ -343,6 +346,20 @@ static void errhandler(pmix_status_t status, pmix_output(0, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); } +static void op_callbk(pmix_status_t status, + void *cbdata) +{ + pmix_output(0, "SERVER: OP CALLBACK CALLED WITH STATUS %d", status); +} + +static void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + status, errhandler_ref); +} + static int connected(const pmix_proc_t *proc, void *server_object) { pmix_output(0, "SERVER: CONNECTED %s:%d", proc->nspace, proc->rank); @@ -403,7 +420,7 @@ static pmix_status_t abort_fn(const pmix_proc_t *proc, x->cbfunc = cbfunc; x->cbdata = cbdata; - if (PMIX_SUCCESS != (rc = PMIx_server_notify_error(status, procs, nprocs, + if (PMIX_SUCCESS != (rc = PMIx_Notify_error(status, procs, nprocs, &x->caller, 1, x->info, 2, abcbfunc, x))) { pmix_output(0, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in index 58d321cc58e..48f3fbf8098 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in @@ -728,6 +728,15 @@ typedef void (*pmix_notification_fn_t)(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo); +/* define a callback function for calls to PMIx_Register_errhandler. The + * status indicates if the request was successful or not, errhandler_ref is + * an integer reference assigned to the errhandler by PMIX, this reference + * must be used to deregister the err handler. A ptr to the original + * cbdata is returned. */ +typedef void (*pmix_errhandler_reg_cbfunc_t) (pmix_status_t status, + int errhandler_ref, + void *cbdata); + /* define a callback function for calls to PMIx_Get_nb. The status * indicates if the requested data was found or not - a pointer to the * pmix_value_t structure containing the found data is returned. The @@ -756,16 +765,71 @@ typedef void (*pmix_value_cbfunc_t)(pmix_status_t status, * will be reported. Options to modify that behavior can be provided * in the info array * + * Both the client application and the resource manager can register + * err handlers for specific errors. PMIx client/server calls the registered + * err handler upon receiving error notify notification (via PMIx_Notify_error) + * from the other end (Resource Manager/Client application). + * + * Multiple err handlers can be registered for different errors. PMIX returns + * an integer reference to each register handler in the callback fn. The caller + * must retain the reference in order to deregister the errhandler. * Modification of the notification behavior can be accomplished by * deregistering the current errhandler, and then registering it * using a new set of info values. * * See pmix_common.h for a description of the notification function */ void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, - pmix_notification_fn_t errhandler); - -/* deregister the errhandler */ -void PMIx_Deregister_errhandler(void); + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata); + +/* deregister the errhandler + * errhandler_ref is the reference returned by PMIx for the errhandler + * to pmix_errhandler_reg_cbfunc_t */ +void PMIx_Deregister_errhandler(int errhandler_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata); +/* Report an error to a process for notification via any + * registered errhandler. The errhandler registration can be + * called by both the server and the client application. On the + * server side, the errhandler is used to report errors detected + * by PMIx to the host server for handling. On the client side, + * the errhandler is used to notify the process of errors + * reported by the server - e.g., the failure of another process. + * + * This function allows the host server to direct the server + * convenience library to notify all indicated local procs of + * an error. The error can be local, or anywhere in the cluster. + * The status indicates the error being reported. + * + * The client application can also call this function to notify the + * resource manager of an error it encountered. It can request the host + * server to notify the indicated processes about the error. + * + * The first array of procs informs the server library as to which + * processes should be alerted - e.g., the processes that are in + * a directly-affected job or are connected to one that is affected. + * Passing a NULL for this array will indicate that all local procs + * are to be notified. + * + * The second array identifies the processes that will be impacted + * by the error. This could consist of a single process, or a number + * of processes. + * + * The info array contains any further info the RM can and/or chooses + * to provide. + * + * The callback function will be called upon completion of the + * notify_error function's actions. Note that any messages will + * have been queued, but may not have been transmitted by this + * time. Note that the caller is required to maintain the input + * data until the callback function has been executed! +*/ +pmix_status_t PMIx_Notify_error(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_proc_t error_procs[], size_t error_nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); /* Provide a string representation of a pmix_status_t value. Note * that the provided string is statically defined and must NOT be diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h b/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h index c8304ec0b24..6c8a1d61b33 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h @@ -254,13 +254,22 @@ typedef pmix_status_t (*pmix_server_disconnect_fn_t)(const pmix_proc_t procs[], pmix_op_cbfunc_t cbfunc, void *cbdata); /* Register to receive notifications for the specified events. The resource - * manager may have access to events beyond process failure. In cases where - * the client application requests to be notified of such events, the request - * will be passed to the PMIx server, which in turn shall pass the request to - * the resource manager. */ + * manager may have access to events beyond process failure. The client + * application requests to be notified of such events by registering a + * err handler(s) for the events. The PMIx client shall pass the request + * to the PMIx server, which in turn shall pass the request to + * the resource manager by calling the register events function. */ typedef pmix_status_t (*pmix_server_register_events_fn_t)(const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister to receive notifications for the specified events that + * the client application has registered for previously. When the client + * application deregisters the err handler forevents, PMIX client passes the + * deregister request to PMIx server which in turn passes the request to the + * resource manager by calling deregister events function.*/ + typedef pmix_status_t (*pmix_server_deregister_events_fn_t)(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); + /* Callback function for incoming connection requests from * local clients */ typedef void (*pmix_connection_cbfunc_t)(int incoming_sd); @@ -279,19 +288,20 @@ typedef pmix_status_t (*pmix_server_listener_fn_t)(int listening_sd, pmix_connection_cbfunc_t cbfunc); typedef struct pmix_server_module_1_0_0_t { - pmix_server_client_connected_fn_t client_connected; - pmix_server_client_finalized_fn_t client_finalized; - pmix_server_abort_fn_t abort; - pmix_server_fencenb_fn_t fence_nb; - pmix_server_dmodex_req_fn_t direct_modex; - pmix_server_publish_fn_t publish; - pmix_server_lookup_fn_t lookup; - pmix_server_unpublish_fn_t unpublish; - pmix_server_spawn_fn_t spawn; - pmix_server_connect_fn_t connect; - pmix_server_disconnect_fn_t disconnect; - pmix_server_register_events_fn_t register_events; - pmix_server_listener_fn_t listener; + pmix_server_client_connected_fn_t client_connected; + pmix_server_client_finalized_fn_t client_finalized; + pmix_server_abort_fn_t abort; + pmix_server_fencenb_fn_t fence_nb; + pmix_server_dmodex_req_fn_t direct_modex; + pmix_server_publish_fn_t publish; + pmix_server_lookup_fn_t lookup; + pmix_server_unpublish_fn_t unpublish; + pmix_server_spawn_fn_t spawn; + pmix_server_connect_fn_t connect; + pmix_server_disconnect_fn_t disconnect; + pmix_server_register_events_fn_t register_events; + pmix_server_deregister_events_fn_t deregister_events; + pmix_server_listener_fn_t listener; } pmix_server_module_t; /**** SERVER SUPPORT INIT/FINALIZE FUNCTIONS ****/ @@ -411,44 +421,6 @@ pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, pmix_dmodex_response_fn_t cbfunc, void *cbdata); -/* Report an error to a process for notification via any - * registered errhandler. The errhandler registration can be - * called by both the server and the client application. On the - * server side, the errhandler is used to report errors detected - * by PMIx to the host server for handling. On the client side, - * the errhandler is used to notify the process of errors - * reported by the server - e.g., the failure of another process. - * - * This function allows the host server to direct the server - * convenience library to notify all indicated local procs of - * an error. The error can be local, or anywhere in the cluster. - * The status indicates the error being reported. - * - * The first array of procs informs the server library as to which - * processes should be alerted - e.g., the processes that are in - * a directly-affected job or are connected to one that is affected. - * Passing a NULL for this array will indicate that all local procs - * are to be notified. - * - * The second array identifies the processes that will be impacted - * by the error. This could consist of a single process, or a number - * of processes. - * - * The info array contains any further info the RM can and/or chooses - * to provide. - * - * The callback function will be called upon completion of the - * notify_error function's actions. Note that any messages will - * have been queued, but may not have been transmitted by this - * time. Note that the caller is required to maintain the input - * data until the callback function has been executed! */ -pmix_status_t PMIx_server_notify_error(pmix_status_t status, - pmix_proc_t procs[], size_t nprocs, - pmix_proc_t error_procs[], size_t error_nprocs, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - - END_C_DECLS #endif diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c index 4bb369d7931..964bc43c76c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c @@ -49,6 +49,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) return PMI2_ERR_INIT; } + /* get the rank */ + *rank = myproc.rank; + if (NULL != size) { /* get the universe size - this will likely pull * down all attributes assigned to the job, thus @@ -57,7 +60,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &kv)) { rc = convert_int(size, kv); PMIX_VALUE_RELEASE(kv); - return convert_err(rc); + if (PMIX_SUCCESS != rc) { + goto error; + } } else { /* cannot continue without this info */ return PMI2_ERR_INIT; @@ -69,7 +74,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &kv)) { rc = convert_int(spawned, kv); PMIX_VALUE_RELEASE(kv); - return convert_err(rc); + if (PMIX_SUCCESS != rc) { + goto error; + } } else { /* if not found, default to not spawned */ *spawned = 0; @@ -81,7 +88,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &kv)) { rc = convert_int(appnum, kv); PMIX_VALUE_RELEASE(kv); - return convert_err(rc); + if (PMIX_SUCCESS != rc) { + goto error; + } } else { /* if not found, default to 0 */ *appnum = 0; @@ -89,6 +98,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) } return PMI2_SUCCESS; + +error: + return convert_err(rc); } int PMI2_Initialized(void) @@ -153,8 +165,8 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, pmix_value_t *val; pmix_proc_t proc; - (void)strncpy(proc.nspace, jobid, PMIX_MAX_NSLEN); - proc.rank = src_pmi_id; + (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); + proc.rank = (src_pmi_id == PMI2_ID_NULL ? myproc.rank : src_pmi_id); rc = PMIx_Get(&proc, key, NULL, 0, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { @@ -173,9 +185,26 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor) { - /* translate the provided name to the equivalent PMIx - * attribute name */ - return PMI2_FAIL; + pmix_status_t rc; + pmix_value_t *val; + + *found = 0; + rc = PMIx_Get(&myproc, name, NULL, 0, &val); + if (PMIX_SUCCESS == rc && NULL != val) { + if (PMIX_STRING != val->type) { + /* this is an error */ + PMIX_VALUE_RELEASE(val); + return PMI2_FAIL; + } + if (NULL != val->data.string) { + (void)strncpy(value, val->data.string, valuelen); + *found = 1; + } + PMIX_VALUE_RELEASE(val); + } else if (PMIX_ERR_NOT_FOUND == rc) { + rc = PMIX_SUCCESS; + } + return convert_err(rc); } /* push info at the PMIX_LOCAL scope */ @@ -208,6 +237,8 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou *found = 1; } PMIX_VALUE_RELEASE(val); + } else if (PMIX_ERR_NOT_FOUND == rc) { + rc = PMIX_SUCCESS; } return convert_err(rc); } @@ -323,7 +354,7 @@ int PMI2_Job_GetId(char jobid[], int jobid_size) if (NULL == jobid) { return PMI2_ERR_INVALID_ARGS; } - (void)strncpy(jobid, pmix_globals.myid.nspace, jobid_size); + (void)strncpy(jobid, myproc.nspace, jobid_size); return PMI2_SUCCESS; } @@ -332,7 +363,7 @@ int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) pmix_status_t rc; pmix_proc_t proc; - (void)strncpy(proc.nspace, jobid, PMIX_MAX_NSLEN); + (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Connect(&proc, 1, NULL, 0); return convert_err(rc); @@ -343,7 +374,7 @@ int PMI2_Job_Disconnect(const char jobid[]) pmix_status_t rc; pmix_proc_t proc; - (void)strncpy(proc.nspace, jobid, PMIX_MAX_NSLEN); + (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Disconnect(&proc, 1, NULL, 0); return convert_err(rc); @@ -442,6 +473,9 @@ static pmix_status_t convert_int(int *value, pmix_value_t *kv) case PMIX_SIZE: *value = kv->data.size; break; + case PMIX_BOOL: + *value = kv->data.flag; + break; default: /* not an integer type */ return PMIX_ERR_BAD_PARAM; diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c index 62cf3ebc6b6..8eb69668495 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c @@ -19,7 +19,7 @@ #include #include - +#include #include "src/include/pmix_globals.h" #ifdef HAVE_STRING_H @@ -860,11 +860,11 @@ static void _dmodex_req(int sd, short args, void *cbdata) * may not be a contribution */ if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val)) && NULL != val) { - data = val->data.bo.bytes; - sz = val->data.bo.size; - /* protect the data */ - val->data.bo.bytes = NULL; - val->data.bo.size = 0; + data = val->data.bo.bytes; + sz = val->data.bo.size; + /* protect the data */ + val->data.bo.bytes = NULL; + val->data.bo.size = 0; PMIX_VALUE_RELEASE(val); } @@ -979,11 +979,11 @@ static void _notify_error(int sd, short args, void *cbdata) } -pmix_status_t PMIx_server_notify_error(pmix_status_t status, - pmix_proc_t procs[], size_t nprocs, - pmix_proc_t error_procs[], size_t error_nprocs, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) +pmix_status_t pmix_server_notify_error(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_proc_t error_procs[], size_t error_nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_notify_caddy_t *cd; size_t n; @@ -1030,8 +1030,10 @@ static void reg_errhandler(int sd, short args, void *cbdata) cd->active = false; } -void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, - pmix_notification_fn_t err) +void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo, + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) { pmix_shift_caddy_t *cd; @@ -1039,7 +1041,7 @@ void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, cd = PMIX_NEW(pmix_shift_caddy_t); cd->info = info; cd->ninfo = ninfo; - cd->err = err; + cd->err = errhandler; PMIX_THREADSHIFT(cd, reg_errhandler); PMIX_WAIT_FOR_COMPLETION(cd->active); PMIX_RELEASE(cd); @@ -1052,10 +1054,11 @@ static void dereg_errhandler(int sd, short args, void *cbdata) cd->active = false; } -void PMIx_Deregister_errhandler(void) +void pmix_server_deregister_errhandler(int errhandler_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata) { pmix_shift_caddy_t *cd; - /* need to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); PMIX_THREADSHIFT(cd, dereg_errhandler); diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h index d6594766bff..21091b6783a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h @@ -11,7 +11,7 @@ #include #include - +#include #include #include "src/usock/usock.h" #include "src/util/hash.h" @@ -232,6 +232,20 @@ void pmix_pack_proc_map(pmix_buffer_t *buf, pmix_status_t pmix_regex_parse_nodes(const char *regexp, char ***names); pmix_status_t pmix_regex_parse_procs(const char *regexp, char ***procs); +void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo, + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata); + +void pmix_server_deregister_errhandler(int errhandler_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata); + +pmix_status_t pmix_server_notify_error(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_proc_t error_procs[], size_t error_nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); extern pmix_server_module_t pmix_host_server; extern pmix_server_globals_t pmix_server_globals; diff --git a/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c b/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c index bd384a2d9ea..d8342890efb 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c @@ -253,3 +253,17 @@ void errhandler(pmix_status_t status, test_abort = true; } +void op_callbk(pmix_status_t status, + void *cbdata) +{ + TEST_VERBOSE(( "OP CALLBACK CALLED WITH STATUS %d", status)); +} + +void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + TEST_VERBOSE(("ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + status, errhandler_ref)); +} + diff --git a/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h b/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h index 6426a142e37..ae9e0c6453a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h @@ -65,3 +65,11 @@ void errhandler(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo); +void op_callbk(pmix_status_t status, + void *cbdata); + +void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata); + + diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c b/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c index a22e9ec994c..759be74e825 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. @@ -24,48 +24,400 @@ #include #include +#include +#include #include "pmi2.h" +/* Target is legacy SLURM pmi2 library implementation */ +static int _legacy = 0; +/* Verbose level 0-silent, 1-fatal, 2-error, 3+ debug*/ +static int _verbose = 1; + +#define log_fatal(fmt, ...) \ + do { \ + if (_verbose > 0) \ + fprintf(stderr, "FATAL " fmt, ##__VA_ARGS__); \ + exit(rc); \ + } while (0) + +#define log_error(fmt, ...) \ + do { \ + if (_verbose > 1) \ + fprintf(stderr, "ERROR " fmt, ##__VA_ARGS__); \ + } while (0) + +#define log_info(fmt, ...) \ + do { \ + if (_verbose > 2) \ + fprintf(stderr, "INFO " fmt, ##__VA_ARGS__); \ + } while (0) + +#define log_assert(e, msg) \ + do { \ + if (!(e)) { \ + log_fatal("%s at %s:%d\n", msg, __FUNCTION__, __LINE__); \ + rc = -1; \ + } \ + } while (0) + +static inline long random_value(long min_value, long max_value) +{ + return ((min_value >= max_value) ? min_value : min_value + (rand() % (max_value - min_value + 1))); +} + +static int test_item1(void); +static int test_item2(void); +static int test_item3(void); +static int test_item4(void); +static int test_item5(void); +static int test_item6(void); +static int test_item7(void); +static int test_item8(void); + +static int spawned, size, rank, appnum; +static char jobid[100]; + int main(int argc, char **argv) { - int spawned, size, rank, appnum; + int ret = 0; int rc; - char *key; - - /* init us */ + char *str = NULL; + int ti = (argc > 1 ? atoi(argv[1]) : 0); + + srand(time(NULL)); + str = getenv("VERBOSE"); + _verbose = (str ? atoi(str) : _verbose); + str = getenv("LEGACY"); + _legacy = (str ? atoi(str) : _legacy); + + spawned = random_value(10, 20); + size = random_value(10, 20); + rank = random_value(10, 20); + appnum = random_value(10, 20); if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { - fprintf(stderr, "PMI2_Init failed: %d\n", rc); + log_fatal("PMI2_Init failed: %d\n", rc); return rc; } - key = "local-key"; - if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(key, "my-local-value"))) { - fprintf(stderr, "PMI2_Put failed: %d\n", rc); + if (!ti || 1 == ti) { + rc = test_item1(); + ret += (rc ? 1 : 0); + log_info("TI1 : %s\n", (rc ? "FAIL" : "PASS")); } - key = "remote-key"; - if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(key, "remote-value"))) { - fprintf(stderr, "PMI2_Put failed: %d\n", rc); + if (!ti || 2 == ti) { + rc = test_item2(); + ret += (rc ? 1 : 0); + log_info("TI2 : %s\n", (rc ? "FAIL" : "PASS")); } - key = "global-key"; - if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(key, "global-value"))) { - fprintf(stderr, "PMI2_Put failed: %d\n", rc); + if (!ti || 3 == ti) { + rc = test_item3(); + ret += (rc ? 1 : 0); + log_info("TI3 : %s\n", (rc ? "FAIL" : "PASS")); } - /* Submit the data */ - if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) { - fprintf(stderr, "PMI2_Fence failed: %d\n", rc); - return rc; + if (!ti || 4 == ti) { + rc = test_item4(); + ret += (rc ? 1 : 0); + log_info("TI4 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 5 == ti) { + rc = test_item5(); + ret += (rc ? 1 : 0); + log_info("TI5 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 6 == ti) { + rc = test_item6(); + ret += (rc ? 1 : 0); + log_info("TI6 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 7 == ti) { + rc = test_item7(); + ret += (rc ? 1 : 0); + log_info("TI7 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 8 == ti) { + rc = test_item8(); + ret += (rc ? 1 : 0); + log_info("TI8 : %s\n", (rc ? "FAIL" : "PASS")); } - /* finalize us */ if (PMI2_SUCCESS != (rc = PMI2_Finalize())) { - fprintf(stderr, "PMI2_Finalize failed: %d\n", rc); + log_fatal("PMI2_Finalize failed: %d\n", rc); + return rc; + } + + return ret; +} + +static int test_item1(void) +{ + int rc = 0; + + log_info("spawned=%d size=%d rank=%d appnum=%d\n", spawned, size, rank, appnum); + + log_assert(spawned == 0 || spawned == 1, ""); + log_assert(size >= 0, ""); + log_assert(rank >= 0, ""); + log_assert(rank < size, ""); + + sprintf(jobid, "%s", __FUNCTION__); + if (PMI2_SUCCESS != (rc = PMI2_Job_GetId(jobid, sizeof(jobid)))) { + log_fatal("PMI2_Job_GetId failed: %d\n", rc); + return rc; + } + + log_info("jobid=%s\n", jobid); + + log_assert(memcmp(jobid, __FUNCTION__, sizeof(__FUNCTION__)), ""); + + return rc; +} + +static int test_item2(void) +{ + int rc = 0; + + log_assert(PMI2_Initialized(), ""); + + return rc; +} + +static int test_item3(void) +{ + int rc = 0; + char val[PMI2_MAX_VALLEN]; + int found = 0; + /* Predefined Job attributes */ + const char *tkeys[] = { + "universeSize", + "hasNameServ", + "physTopology", + "physTopologyLevels", + "cartDims", + "isHeterogeneous", + NULL + }; + const char **ptr = tkeys; + + while (*ptr) { + if (PMI2_SUCCESS != (rc = PMI2_Info_GetJobAttr(*ptr, val, sizeof(val), &found))) { + log_fatal("PMI2_Info_GetJobAttr: [%s] %d\n", *ptr, rc); + return rc; + } + log_info("key=%s value=%s found=%d\n", *ptr, (found ? val : "N/A"), found); + if (!_legacy) { + log_assert(!found, "Check test. Probably PMIx has a new functionality"); + } + ptr++; + } + + return rc; +} + +static int test_item4(void) +{ + int rc = 0; + char val[PMI2_MAX_VALLEN]; + int found = 0; + /* Predefined Node attributes */ + const char *tkeys[] = { + "memPoolType", + "memSYSVid", + "memAnonMMAPfd", + "memNTName", + NULL + }; + const char **ptr = tkeys; + + if (_legacy) { + return rc; + } + + while (*ptr) { + if (PMI2_SUCCESS != (rc = PMI2_Info_GetNodeAttr(*ptr, val, sizeof(val), &found, 1))) { + log_fatal("PMI2_Info_GetNodeAttr: [%s] %d\n", *ptr, rc); + return rc; + } + log_info("key=%s value=%s found=%d\n", *ptr, (found ? val : "N/A"), found); + if (!_legacy) { + log_assert(!found, "Check test. Probably PMIx has a new functionality"); + } + ptr++; + } + + return rc; +} + +static int test_item5(void) +{ + int rc = 0; + char val[PMI2_MAX_VALLEN]; + int found = 0; + const char *tkey = "sharedFilename"; + const char *tval = "pmix-pmi2-check"; + + if (PMI2_SUCCESS != (rc = PMI2_Info_PutNodeAttr(tkey, tval))) { + log_fatal("PMI2_Info_PutNodeAttr %d\n", rc); + return rc; + } + + if (PMI2_SUCCESS != (rc = PMI2_Info_GetNodeAttr(tkey, val, sizeof(val), &found, 1))) { + log_fatal("PMI2_Info_GetNodeAttr %d\n", rc); + return rc; + } + + log_info("tkey=%s tval=%s val=%s found=%d\n", tkey, tval, val, found); + + log_assert(found, "PMI2_Info_GetNodeAttr does not find expected key"); + log_assert(strlen(tval) == strlen(val), "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + + return rc; +} + +static int test_item6(void) +{ + int rc = 0; + char val[PMI2_MAX_VALLEN]; + int len; + const char *tkey = __FUNCTION__; + const char *tval = __FILE__; + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(tkey, tval))) { + log_fatal("PMI2_KVS_Put %d\n", rc); + return rc; + } + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(NULL, PMI2_ID_NULL, tkey, val, sizeof(val), &len))) { + log_fatal("PMI2_KVS_Get %d\n", rc); return rc; } - - return 0; + + log_info("tkey=%s tval=%s val=%s len=%d\n", tkey, tval, val, len); + + log_assert((int)strlen(tval) == len, "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + + return rc; +} + +static int test_item7(void) +{ + int rc = 0; + int len; + char tkey[PMI2_MAX_VALLEN]; + char tval[PMI2_MAX_VALLEN]; + char val[PMI2_MAX_VALLEN]; + int i = 0; + + for (i = 0; i < size; i++) { + sprintf(tkey, "KEY-%d", i); + sprintf(tval, "VALUE-%d", i); + if (i == rank) { + if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(tkey, tval))) { + log_fatal("PMI2_KVS_Put [%s=%s] %d\n", tkey, tval, rc); + return rc; + } + } + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) { + log_fatal("PMI2_KVS_Fence %d\n", rc); + return rc; + } + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, PMI2_ID_NULL, tkey, val, sizeof(val), &len))) { + log_fatal("PMI2_KVS_Get %d\n", rc); + return rc; + } + + log_info("tkey=%s tval=%s val=%s len=%d\n", tkey, tval, val, len); + + log_assert((int)strlen(tval) == len, "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + } + + return rc; +} + +static int test_item8(void) +{ + int rc = 0; + int i, j, r; + char symb, symb_start = 'a'; + int fence_cnt; + int fence_num = random_value(2, 10); + int keys_per_fence = random_value(10, 1000); + int val_size = random_value(10, PMI2_MAX_VALLEN); + int keys_total = 0; + + fence_cnt = 0; + while (fence_cnt < fence_num) { + symb = symb_start; + for (i = 0; i < keys_per_fence; i++) { + char key[PMI2_MAX_KEYLEN]; + char val[PMI2_MAX_VALLEN] = ""; + sprintf(key, "RANK%d-key-%d", rank, i + keys_total); + for (j = 0; j < val_size; j++) { + val[j] = symb; + } + symb++; + if (symb > 'z') { + symb = 'a'; + } + if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(key, val))) { + log_fatal("PMI2_KVS_Put [%s=%s] %d\n", key, val, rc); + return rc; + } + } + symb_start = symb; + keys_total += keys_per_fence; + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) { + log_fatal("PMI2_KVS_Fence %d\n", rc); + return rc; + } + + for (r = 0; r < size; r++) { + int len; + symb = 'a'; + for (i = 0; i < keys_total; i++) { + char key[PMI2_MAX_KEYLEN]; + char val[PMI2_MAX_VALLEN] = ""; + sprintf(key, "RANK%d-key-%d", r, i); + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, PMI2_ID_NULL, key, val, sizeof(val), &len))) { + log_fatal("PMI2_KVS_Get %d\n", rc); + return rc; + } + + if (len != val_size) { + log_fatal("%d: failure on rank %d, key #%d: len mismatch:" + " %d instead of %d\n", rank, r, i, len, val_size); + } + + for (j = 0; j < val_size; j++) { + if (val[j] != symb) { + log_fatal("%d: failure on rank %d, key #%d: value mismatch" + " at symb %d: \'%c\' instead of \'%c\'\n", rank, + r, i, j, val[j], symb); + } + } + symb++; + if (symb > 'z') { + symb = 'a'; + } + } + } + fence_cnt++; + } + + return rc; } diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c b/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c index 3432324a7f3..7e686131305 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c @@ -85,7 +85,7 @@ int main(int argc, char **argv) return rc; } /* register the errhandler */ - PMIx_Register_errhandler(NULL, 0, errhandler); + PMIx_Register_errhandler(NULL, 0, errhandler, errhandler_reg_callbk, NULL); order[CLI_UNINIT] = CLI_FORKED; order[CLI_FORKED] = CLI_FIN; @@ -171,7 +171,7 @@ int main(int argc, char **argv) pmix_argv_free(client_env); /* deregister the errhandler */ - PMIx_Deregister_errhandler(); + PMIx_Deregister_errhandler(0, op_callbk, NULL); cli_wait_all(1.0); diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c index a09e0ea8247..1b405fad859 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c @@ -48,6 +48,20 @@ static void notification_fn(pmix_status_t status, completed = true; } +static void op_callbk(pmix_status_t status, + void *cbdata) +{ + pmix_output(0, "CLIENT: OP CALLBACK CALLED WITH STATUS %d", status); +} + +static void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + pmix_output(0, "Client: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + status, errhandler_ref); +} + int main(int argc, char **argv) { int rc; @@ -55,7 +69,7 @@ int main(int argc, char **argv) pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; - + /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); @@ -72,10 +86,10 @@ int main(int argc, char **argv) PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); completed = false; - + /* register our errhandler */ - PMIx_Register_errhandler(NULL, 0, notification_fn); - + PMIx_Register_errhandler(NULL, 0, notification_fn, errhandler_reg_callbk, NULL); + /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); @@ -84,7 +98,7 @@ int main(int argc, char **argv) pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } - + /* rank=0 calls abort */ if (0 == myproc.rank) { PMIx_Abort(PMIX_ERR_OUT_OF_RESOURCE, "Eat rocks", @@ -102,8 +116,8 @@ int main(int argc, char **argv) done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); - PMIx_Deregister_errhandler(); - + PMIx_Deregister_errhandler(0, op_callbk, NULL); + if (PMIX_SUCCESS != (rc = PMIx_Finalize())) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c index 1cfc94c8a28..2cb82419c06 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c @@ -153,6 +153,10 @@ static void errhandler(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo); static void wait_signal_callback(int fd, short event, void *arg); +static void op_callbk(pmix_status_t status, void *cbdata); +static void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata); static void opcbfunc(pmix_status_t status, void *cbdata) { @@ -192,7 +196,7 @@ int main(int argc, char **argv) return rc; } /* register the errhandler */ - PMIx_Register_errhandler(NULL, 0, errhandler); + PMIx_Register_errhandler(NULL, 0, errhandler, errhandler_reg_callbk, NULL); /* setup the pub data, in case it is used */ PMIX_CONSTRUCT(&pubdata, pmix_list_t); @@ -293,7 +297,7 @@ int main(int argc, char **argv) pmix_argv_free(client_env); /* deregister the errhandler */ - PMIx_Deregister_errhandler(); + PMIx_Deregister_errhandler(0, op_callbk, NULL); /* release any pub data */ PMIX_LIST_DESTRUCT(&pubdata); @@ -355,6 +359,20 @@ static void errhandler(pmix_status_t status, pmix_output(0, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); } +static void op_callbk(pmix_status_t status, + void *cbdata) +{ + pmix_output(0, "SERVER: OP CALLBACK CALLED WITH STATUS %d", status); +} + +static void errhandler_reg_callbk (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + status, errhandler_ref); +} + static pmix_status_t connected(const pmix_proc_t *proc, void *server_object) { return PMIX_SUCCESS; @@ -413,7 +431,7 @@ static int abort_fn(const pmix_proc_t *proc, x->cbfunc = cbfunc; x->cbdata = cbdata; - if (PMIX_SUCCESS != (rc = PMIx_server_notify_error(status, procs, nprocs, + if (PMIX_SUCCESS != (rc = PMIx_Notify_error(status, procs, nprocs, &x->caller, 1, x->info, 2, abcbfunc, x))) { pmix_output(0, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); diff --git a/opal/mca/pmix/pmix1xx/pmix1_client.c b/opal/mca/pmix/pmix1xx/pmix1_client.c index a54f4e581b6..6411264400e 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_client.c +++ b/opal/mca/pmix/pmix1xx/pmix1_client.c @@ -33,6 +33,7 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; +static int errhdler_ref = 0; static void myerr(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, @@ -71,6 +72,16 @@ static void myerr(pmix_status_t status, OPAL_LIST_DESTRUCT(&ilist); } +static void errreg_cbfunc (pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%d", + status, errhandler_ref); +} + int pmix1_client_init(void) { opal_process_name_t pname; @@ -112,7 +123,7 @@ int pmix1_client_init(void) opal_proc_set_name(&pname); /* register the errhandler */ - PMIx_Register_errhandler(NULL, 0, myerr); + PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL ); return OPAL_SUCCESS; } @@ -125,7 +136,7 @@ int pmix1_client_finalize(void) "PMIx_client finalize"); /* deregister the errhandler */ - PMIx_Deregister_errhandler(); + PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); rc = PMIx_Finalize(); @@ -711,7 +722,7 @@ static void lk_cbfunc(pmix_status_t status, { pmix1_opcaddy_t *op = (pmix1_opcaddy_t*)cbdata; opal_pmix_pdata_t *d; - opal_list_t results, *r; + opal_list_t results, *r = NULL; int rc; size_t n; opal_pmix1_jobid_trkr_t *job, *jptr; @@ -764,10 +775,7 @@ static void lk_cbfunc(pmix_status_t status, } } r = &results; - } else { - r = NULL; } - release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); diff --git a/opal/mca/pmix/pmix1xx/pmix1_server_north.c b/opal/mca/pmix/pmix1xx/pmix1_server_north.c index a1d38cd4826..8ccaab7ec4f 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_server_north.c +++ b/opal/mca/pmix/pmix1xx/pmix1_server_north.c @@ -79,6 +79,8 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro pmix_op_cbfunc_t cbfunc, void *cbdata); static pmix_status_t server_register_events(const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); static pmix_status_t server_listener_fn(int listening_sd, pmix_connection_cbfunc_t cbfunc); @@ -95,6 +97,7 @@ pmix_server_module_t mymodule = { server_connect_fn, server_disconnect_fn, server_register_events, + server_deregister_events, server_listener_fn }; @@ -758,6 +761,11 @@ static pmix_status_t server_register_events(const pmix_info_t info[], size_t nin return pmix1_convert_opalrc(rc); } +static pmix_status_t server_deregister_events(const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + return PMIX_ERR_NOT_IMPLEMENTED; +} static pmix_status_t server_listener_fn(int listening_sd, pmix_connection_cbfunc_t cbfunc) { diff --git a/opal/mca/pmix/pmix1xx/pmix1_server_south.c b/opal/mca/pmix/pmix1xx/pmix1_server_south.c index ebaf00500a6..6b42afbf12c 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_server_south.c +++ b/opal/mca/pmix/pmix1xx/pmix1_server_south.c @@ -47,6 +47,7 @@ extern pmix_server_module_t mymodule; extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; +static int errhdler_ref = 0; static void myerr(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, @@ -85,6 +86,16 @@ static void myerr(pmix_status_t status, OPAL_LIST_DESTRUCT(&ilist); } +static void errreg_cbfunc(pmix_status_t status, + int errhandler_ref, + void *cbdata) +{ + errhdler_ref = errhandler_ref; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%d", + status, errhandler_ref); +} + int pmix1_server_init(opal_pmix_server_module_t *module) { pmix_status_t rc; @@ -102,7 +113,7 @@ int pmix1_server_init(opal_pmix_server_module_t *module) host_module = module; /* register the errhandler */ - PMIx_Register_errhandler(NULL, 0, myerr); + PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL); return OPAL_SUCCESS; } @@ -111,7 +122,7 @@ int pmix1_server_finalize(void) pmix_status_t rc; /* deregister the errhandler */ - PMIx_Deregister_errhandler(); + PMIx_Deregister_errhandler(errhdler_ref, NULL, NULL); rc = PMIx_server_finalize(); return pmix1_convert_rc(rc); @@ -354,7 +365,7 @@ int pmix1_server_notify_error(int status, op->cbdata = cbdata; rc = pmix1_convert_opalrc(status); - rc = PMIx_server_notify_error(rc, ps, psz, eps, esz, + rc = PMIx_Notify_error(rc, ps, psz, eps, esz, pinfo, sz, opcbfunc, op); if (PMIX_SUCCESS != rc) { OBJ_RELEASE(op); From 6f37c0e3e86ac9c5416d786775cce30c95e7e330 Mon Sep 17 00:00:00 2001 From: annu13 Date: Fri, 2 Oct 2015 17:25:48 -0700 Subject: [PATCH 2/5] sync with PMIX master --- .../pmix/pmix1xx/pmix/src/common/Makefile.am | 11 +++++ .../pmix1xx/pmix/src/common/pmix_common.c | 44 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/common/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/common/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/src/common/Makefile.am new file mode 100644 index 00000000000..099a99903e4 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/src/common/Makefile.am @@ -0,0 +1,11 @@ +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources += \ + src/common/pmix_common.c diff --git a/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c b/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c new file mode 100644 index 00000000000..43a872d85b3 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c @@ -0,0 +1,44 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include "src/include/pmix_globals.h" + +void PMIx_Register_errhandler(pmix_info_t info[], size_t ninfo, + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) +{ + /* common err handler registration to be added */ +} + +void PMIx_Deregister_errhandler(int errhandler_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + /* common err handler deregistration goes here */ +} + +pmix_status_t PMIx_Notify_error(pmix_status_t status, + pmix_proc_t procs[], size_t nprocs, + pmix_proc_t error_procs[], size_t error_nprocs, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + /* common err notify goes here */ + return PMIX_SUCCESS; +} From 30ba00e05d9fd3a2bef0f9669e64ad19ff64782e Mon Sep 17 00:00:00 2001 From: annu13 Date: Tue, 6 Oct 2015 06:04:54 -0700 Subject: [PATCH 3/5] sync with master --- opal/mca/pmix/pmix1xx/pmix1_client.c | 2 +- opal/mca/pmix/pmix1xx/pmix1_server_south.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/pmix1xx/pmix1_client.c b/opal/mca/pmix/pmix1xx/pmix1_client.c index 6411264400e..f1ba0d58916 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_client.c +++ b/opal/mca/pmix/pmix1xx/pmix1_client.c @@ -77,7 +77,7 @@ static void errreg_cbfunc (pmix_status_t status, void *cbdata) { errhdler_ref = errhandler_ref; - opal_output_verbose(1, opal_pmix_base_framework.framework_output, + opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%d", status, errhandler_ref); } diff --git a/opal/mca/pmix/pmix1xx/pmix1_server_south.c b/opal/mca/pmix/pmix1xx/pmix1_server_south.c index 6b42afbf12c..ae42de0082d 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_server_south.c +++ b/opal/mca/pmix/pmix1xx/pmix1_server_south.c @@ -91,7 +91,7 @@ static void errreg_cbfunc(pmix_status_t status, void *cbdata) { errhdler_ref = errhandler_ref; - opal_output_verbose(1, opal_pmix_base_framework.framework_output, + opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%d", status, errhandler_ref); } From 5787e9248f3c720acd5487912936524f16b37db9 Mon Sep 17 00:00:00 2001 From: annu13 Date: Tue, 6 Oct 2015 06:25:36 -0700 Subject: [PATCH 4/5] cleaned up debug stmts --- opal/mca/pmix/pmix1xx/pmix/examples/server.c | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/server.c b/opal/mca/pmix/pmix1xx/pmix/examples/server.c index e80ce99e53f..1822852bd0c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/server.c +++ b/opal/mca/pmix/pmix1xx/pmix/examples/server.c @@ -343,26 +343,26 @@ static void errhandler(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { - pmix_output(0, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); + pmix_output_verbose(2, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); } static void op_callbk(pmix_status_t status, void *cbdata) { - pmix_output(0, "SERVER: OP CALLBACK CALLED WITH STATUS %d", status); + pmix_output_verbose(2, "SERVER: OP CALLBACK CALLED WITH STATUS %d", status); } static void errhandler_reg_callbk (pmix_status_t status, int errhandler_ref, void *cbdata) { - pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + pmix_output_verbose(2, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", status, errhandler_ref); } static int connected(const pmix_proc_t *proc, void *server_object) { - pmix_output(0, "SERVER: CONNECTED %s:%d", proc->nspace, proc->rank); + pmix_output_verbose(2, "SERVER: CONNECTED %s:%d", proc->nspace, proc->rank); return PMIX_SUCCESS; } @@ -370,7 +370,7 @@ static int connected(const pmix_proc_t *proc, void *server_object) static int finalized(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: FINALIZED %s:%d", proc->nspace, proc->rank); + pmix_output_verbose(2, "SERVER: FINALIZED %s:%d", proc->nspace, proc->rank); --wakeup; /* ensure we call the cbfunc so the proc can exit! */ if (NULL != cbfunc) { @@ -399,7 +399,7 @@ static pmix_status_t abort_fn(const pmix_proc_t *proc, pmix_status_t rc; myxfer_t *x; - pmix_output(0, "SERVER: ABORT on %s:%d", procs[0].nspace, procs[0].rank); + pmix_output_verbose(2, "SERVER: ABORT on %s:%d", procs[0].nspace, procs[0].rank); /* instead of aborting the specified procs, notify them * (if they have registered their errhandler) */ @@ -423,7 +423,7 @@ static pmix_status_t abort_fn(const pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = PMIx_Notify_error(status, procs, nprocs, &x->caller, 1, x->info, 2, abcbfunc, x))) { - pmix_output(0, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); + pmix_output_verbose(1, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); } return PMIX_SUCCESS; @@ -435,7 +435,7 @@ static int fencenb_fn(const pmix_proc_t procs[], size_t nprocs, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: FENCENB"); + pmix_output_verbose(2, "SERVER: FENCENB"); /* pass the provided data back to each participating proc */ if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, data, ndata, cbdata, NULL, NULL); @@ -448,7 +448,7 @@ static int dmodex_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: DMODEX"); + pmix_output_verbose(2, "SERVER: DMODEX"); /* we don't have any data for remote procs as this * test only runs one server - so report accordingly */ @@ -466,7 +466,7 @@ static int publish_fn(const pmix_proc_t *proc, pmix_locdat_t *p; size_t n; - pmix_output(0, "SERVER: PUBLISH"); + pmix_output_verbose(2, "SERVER: PUBLISH"); for (n=0; n < ninfo; n++) { p = PMIX_NEW(pmix_locdat_t); @@ -493,7 +493,7 @@ static int lookup_fn(const pmix_proc_t *proc, char **keys, pmix_pdata_t *pd; pmix_status_t ret = PMIX_ERR_NOT_FOUND; - pmix_output(0, "SERVER: LOOKUP"); + pmix_output_verbose(2, "SERVER: LOOKUP"); PMIX_CONSTRUCT(&results, pmix_list_t); @@ -539,7 +539,7 @@ static int unpublish_fn(const pmix_proc_t *proc, char **keys, pmix_locdat_t *p, *p2; size_t n; - pmix_output(0, "SERVER: UNPUBLISH"); + pmix_output_verbose(2, "SERVER: UNPUBLISH"); for (n=0; NULL != keys[n]; n++) { PMIX_LIST_FOREACH_SAFE(p, p2, &pubdata, pmix_locdat_t) { @@ -572,7 +572,7 @@ static int spawn_fn(const pmix_proc_t *proc, { myxfer_t *x; - pmix_output(0, "SERVER: SPAWN"); + pmix_output_verbose(2, "SERVER: SPAWN"); /* in practice, we would pass this request to the local * resource manager for launch, and then have that server @@ -595,7 +595,7 @@ static int connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: CONNECT"); + pmix_output_verbose(2, "SERVER: CONNECT"); /* in practice, we would pass this request to the local * resource manager for handling */ @@ -612,7 +612,7 @@ static int disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: DISCONNECT"); + pmix_output_verbose(2, "SERVER: DISCONNECT"); /* in practice, we would pass this request to the local * resource manager for handling */ @@ -627,7 +627,7 @@ static int disconnect_fn(const pmix_proc_t procs[], size_t nprocs, static int register_events_fn(const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: REGISTER EVENTS"); + pmix_output_verbose(2, "SERVER: REGISTER EVENTS"); /* in practice, we would pass this request to the local * resource manager for handling */ From cc5e1e26a529c492b3f476fe5cd4b2ce542c3caf Mon Sep 17 00:00:00 2001 From: annu13 Date: Fri, 9 Oct 2015 15:17:43 -0700 Subject: [PATCH 5/5] sync with pmix master (repo_rev git69c398e) --- opal/mca/pmix/pmix1xx/pmix/VERSION | 4 +- opal/mca/pmix/pmix1xx/pmix/examples/server.c | 32 +- opal/mca/pmix/pmix1xx/pmix/include/pmi2.h | 108 +++-- .../pmix/include/pmix/pmix_common.h.in | 4 +- .../pmix/pmix1xx/pmix/include/private/types.h | 2 + .../pmix/pmix1xx/pmix/src/buffer_ops/pack.c | 2 + .../pmix/pmix1xx/pmix/src/buffer_ops/unpack.c | 2 + opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c | 335 +++++++++----- opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c | 242 ++++++++-- .../pmix1xx/pmix/src/client/pmix_client.c | 2 + .../pmix/src/client/pmix_client_fence.c | 2 + .../pmix1xx/pmix/src/client/pmix_client_get.c | 2 + .../pmix1xx/pmix/src/server/pmix_server.c | 2 + .../pmix1xx/pmix/src/server/pmix_server_ops.c | 2 + .../pmix1xx/pmix/src/server/pmix_server_ops.h | 2 + opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c | 2 + opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c | 95 +++- opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c | 430 ++++++++++++++++-- opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c | 2 + .../pmix/pmix1xx/pmix/test/server_callbacks.c | 2 + .../pmix/pmix1xx/pmix/test/server_callbacks.h | 2 + opal/mca/pmix/pmix1xx/pmix/test/test_common.c | 2 + opal/mca/pmix/pmix1xx/pmix/test/test_common.h | 2 + opal/mca/pmix/pmix1xx/pmix/test/test_fence.c | 2 + opal/mca/pmix/pmix1xx/pmix/test/test_fence.h | 2 + .../mca/pmix/pmix1xx/pmix/test/test_publish.c | 2 + .../mca/pmix/pmix1xx/pmix/test/test_publish.h | 2 + .../pmix1xx/pmix/test/test_resolve_peers.c | 2 + .../pmix1xx/pmix/test/test_resolve_peers.h | 2 + opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c | 2 + opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h | 2 + opal/mca/pmix/pmix1xx/pmix/test/utils.c | 2 + opal/mca/pmix/pmix1xx/pmix/test/utils.h | 2 + 33 files changed, 1023 insertions(+), 277 deletions(-) diff --git a/opal/mca/pmix/pmix1xx/pmix/VERSION b/opal/mca/pmix/pmix1xx/pmix/VERSION index 3952e1f675c..5fac8bdaf60 100644 --- a/opal/mca/pmix/pmix1xx/pmix/VERSION +++ b/opal/mca/pmix/pmix1xx/pmix/VERSION @@ -30,7 +30,7 @@ greek=a1 # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git3c37421 +repo_rev=git69c398e # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Oct 02, 2015" +date="Oct 09, 2015" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/server.c b/opal/mca/pmix/pmix1xx/pmix/examples/server.c index 1822852bd0c..12b7bcaa8e7 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/server.c +++ b/opal/mca/pmix/pmix1xx/pmix/examples/server.c @@ -343,26 +343,26 @@ static void errhandler(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_info_t info[], size_t ninfo) { - pmix_output_verbose(2, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); + pmix_output_verbose(0, pmix_globals.debug_output, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); } static void op_callbk(pmix_status_t status, void *cbdata) { - pmix_output_verbose(2, "SERVER: OP CALLBACK CALLED WITH STATUS %d", status); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: OP CALLBACK CALLED WITH STATUS %d", status); } static void errhandler_reg_callbk (pmix_status_t status, int errhandler_ref, void *cbdata) { - pmix_output_verbose(2, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", + pmix_output_verbose(1, pmix_globals.debug_output, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%d", status, errhandler_ref); } static int connected(const pmix_proc_t *proc, void *server_object) { - pmix_output_verbose(2, "SERVER: CONNECTED %s:%d", proc->nspace, proc->rank); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: CONNECTED %s:%d", proc->nspace, proc->rank); return PMIX_SUCCESS; } @@ -370,7 +370,7 @@ static int connected(const pmix_proc_t *proc, void *server_object) static int finalized(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output_verbose(2, "SERVER: FINALIZED %s:%d", proc->nspace, proc->rank); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: FINALIZED %s:%d", proc->nspace, proc->rank); --wakeup; /* ensure we call the cbfunc so the proc can exit! */ if (NULL != cbfunc) { @@ -399,7 +399,7 @@ static pmix_status_t abort_fn(const pmix_proc_t *proc, pmix_status_t rc; myxfer_t *x; - pmix_output_verbose(2, "SERVER: ABORT on %s:%d", procs[0].nspace, procs[0].rank); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: ABORT on %s:%d", procs[0].nspace, procs[0].rank); /* instead of aborting the specified procs, notify them * (if they have registered their errhandler) */ @@ -423,7 +423,7 @@ static pmix_status_t abort_fn(const pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = PMIx_Notify_error(status, procs, nprocs, &x->caller, 1, x->info, 2, abcbfunc, x))) { - pmix_output_verbose(1, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); + pmix_output_verbose(0, pmix_globals.debug_output, "SERVER: FAILED NOTIFY ERROR %d", (int)rc); } return PMIX_SUCCESS; @@ -435,7 +435,7 @@ static int fencenb_fn(const pmix_proc_t procs[], size_t nprocs, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix_output_verbose(2, "SERVER: FENCENB"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: FENCENB"); /* pass the provided data back to each participating proc */ if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, data, ndata, cbdata, NULL, NULL); @@ -448,7 +448,7 @@ static int dmodex_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix_output_verbose(2, "SERVER: DMODEX"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: DMODEX"); /* we don't have any data for remote procs as this * test only runs one server - so report accordingly */ @@ -466,7 +466,7 @@ static int publish_fn(const pmix_proc_t *proc, pmix_locdat_t *p; size_t n; - pmix_output_verbose(2, "SERVER: PUBLISH"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: PUBLISH"); for (n=0; n < ninfo; n++) { p = PMIX_NEW(pmix_locdat_t); @@ -493,7 +493,7 @@ static int lookup_fn(const pmix_proc_t *proc, char **keys, pmix_pdata_t *pd; pmix_status_t ret = PMIX_ERR_NOT_FOUND; - pmix_output_verbose(2, "SERVER: LOOKUP"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: LOOKUP"); PMIX_CONSTRUCT(&results, pmix_list_t); @@ -539,7 +539,7 @@ static int unpublish_fn(const pmix_proc_t *proc, char **keys, pmix_locdat_t *p, *p2; size_t n; - pmix_output_verbose(2, "SERVER: UNPUBLISH"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: UNPUBLISH"); for (n=0; NULL != keys[n]; n++) { PMIX_LIST_FOREACH_SAFE(p, p2, &pubdata, pmix_locdat_t) { @@ -572,7 +572,7 @@ static int spawn_fn(const pmix_proc_t *proc, { myxfer_t *x; - pmix_output_verbose(2, "SERVER: SPAWN"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: SPAWN"); /* in practice, we would pass this request to the local * resource manager for launch, and then have that server @@ -595,7 +595,7 @@ static int connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output_verbose(2, "SERVER: CONNECT"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: CONNECT"); /* in practice, we would pass this request to the local * resource manager for handling */ @@ -612,7 +612,7 @@ static int disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output_verbose(2, "SERVER: DISCONNECT"); + pmix_output_verbose(2, pmix_globals.debug_output,"SERVER: DISCONNECT"); /* in practice, we would pass this request to the local * resource manager for handling */ @@ -627,7 +627,7 @@ static int disconnect_fn(const pmix_proc_t procs[], size_t nprocs, static int register_events_fn(const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output_verbose(2, "SERVER: REGISTER EVENTS"); + pmix_output_verbose(2, pmix_globals.debug_output, "SERVER: REGISTER EVENTS"); /* in practice, we would pass this request to the local * resource manager for handling */ diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmi2.h b/opal/mca/pmix/pmix1xx/pmix/include/pmi2.h index a01de5f30d9..77052916040 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmi2.h +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmi2.h @@ -89,7 +89,7 @@ typedef struct PMI_keyval_t cannot access the KVS spaces of another job (this may happen, for example, if each mpiexec creates the KVS spaces for the processes that it manages). - + @*/ typedef struct PMI2_Connect_comm { int (*read)( void *buf, int maxlen, void *ctx ); @@ -107,10 +107,10 @@ typedef struct PMI2_Connect_comm { . size - number of processes in the job . rank - rank of this process in the job - appnum - which executable is this on the mpiexec commandline - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. - + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. + Notes: Initialize PMI for this process group. The value of spawned indicates whether this process was created by 'PMI2_Spawn_multiple'. 'spawned' will be non-zero @@ -121,13 +121,13 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum); /*@ PMI2_Finalize - finalize the Process Manager Interface - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. - + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. + Notes: Finalize PMI for this job. - + @*/ int PMI2_Finalize(void); @@ -136,17 +136,17 @@ int PMI2_Finalize(void); Return values: Non-zero if PMI2_Initialize has been called successfully, zero otherwise. - + @*/ int PMI2_Initialized(void); /*@ PMI2_Abort - abort the process group associated with this process - + Input Parameters: + flag - non-zero if all processes in this job should abort, zero otherwise - error_msg - error message to be printed - + Return values: If the abort succeeds this function will not return. Returns an MPI error code otherwise. @@ -163,7 +163,7 @@ int PMI2_Abort(int flag, const char msg[]); . argcs - size of argv arrays for each command string . argvs - array of argv arrays for each command string . maxprocs - array of maximum processes to spawn for each command string - . info_keyval_sizes - array giving the number of elements in each of the + . info_keyval_sizes - array giving the number of elements in each of the 'info_keyval_vectors' . info_keyval_vectors - array of keyval vector arrays . preput_keyval_size - Number of elements in 'preput_keyval_vector' @@ -175,7 +175,7 @@ int PMI2_Abort(int flag, const char msg[]); - errors - array of errors for each command Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: This function spawns a set of processes into a new job. The 'count' @@ -184,7 +184,7 @@ int PMI2_Abort(int flag, const char msg[]); to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector' contains keyval pairs that will be put in the keyval space of the newly created job before the processes are started. The 'maxprocs' array - specifies the desired number of processes to create for each 'cmd' string. + specifies the desired number of processes to create for each 'cmd' string. The actual number of processes may be less than the numbers specified in maxprocs. The acceptable number of processes spawned may be controlled by ``soft'' keyvals in the info arrays. The ``soft'' option is specified by @@ -202,20 +202,38 @@ int PMI2_Job_Spawn(int count, const char * cmds[], int errors[]); /*@ - PMI2_Job_GetId - get job id of this job + PMI2_Job_GetId - get job id of this job Input parameters: . jobid_size - size of buffer provided in jobid Output parameters: . jobid - the job id of this job - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ int PMI2_Job_GetId(char jobid[], int jobid_size); +/*@ + PMI2_Job_GetRank - get rank of this job + Output parameters: + . rank - the rank of this job + Return values: + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. +@*/ +int PMI2_Job_GetRank(int* rank); + +/*@ + PMI2_Info_GetSize - get the number of processes on the node + Output parameters: + . rank - the rank of this job + Return values: + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. +@*/ +int PMI2_Info_GetSize(int* size); + /*@ PMI2_Job_Connect - connect to the parallel job with ID jobid @@ -225,9 +243,9 @@ int PMI2_Job_GetId(char jobid[], int jobid_size); Output parameters: . conn - connection structure used to exteblish communication with the remote job - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: This just "registers" the other parallel job as part of a parallel @@ -247,7 +265,7 @@ int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn); . jobid - job id of the job to connect to Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ int PMI2_Job_Disconnect(const char jobid[]); @@ -258,9 +276,9 @@ int PMI2_Job_Disconnect(const char jobid[]); Input Parameters: + key - key - value - value - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: If multiple PMI2_KVS_Put calls are made with the same key between @@ -274,7 +292,7 @@ int PMI2_KVS_Put(const char key[], const char value[]); PMI2_KVS_Fence - commit all PMI2_KVS_Put calls made before this fence Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: This is a collective call across the job. It has semantics that are @@ -287,7 +305,7 @@ int PMI2_KVS_Put(const char key[], const char value[]); their corresponding PMI2_KVS_Fence until some process issues a PMI2_KVS_Get. This might be appropriate for some wide-area implementations. - + @*/ int PMI2_KVS_Fence(void); @@ -308,9 +326,9 @@ int PMI2_KVS_Fence(void); + value - value associated with key - vallen - length of the returned value, or, if the length is longer than maxvalue, the negative of the required length is returned - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxvalue, int *vallen); @@ -328,9 +346,9 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value Output Parameters: + value - value of the attribute - found - non-zero indicates that the attribute was found - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: This provides a way, when combined with PMI2_Info_PutNodeAttr, for @@ -367,9 +385,9 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo + array - value of attribute . outlen - number of elements returned - found - non-zero if attribute was found - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: Notice that, unlike PMI2_Info_GetNodeAttr, this function does not @@ -398,12 +416,12 @@ int PMI2_Info_GetNodeAttrIntArray(const char name[], int array[], int arraylen, - value - the value of the attribute Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Notes: For example, it might be used to share segment ids with other processes on the same SMP node. - + @*/ int PMI2_Info_PutNodeAttr(const char name[], const char value[]); @@ -418,9 +436,9 @@ int PMI2_Info_PutNodeAttr(const char name[], const char value[]); Output Parameters: + value - value of the attribute - found - non-zero indicates that the attribute was found - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found); @@ -437,9 +455,9 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou + array - value of attribute . outlen - number of elements returned - found - non-zero if attribute was found - + Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. Predefined array attribute names: @@ -449,7 +467,7 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou . hasNameServ - The value hasNameServ is true if the PMI2 environment supports the name service operations (publish, lookup, and unpublish). - + . physTopology - Return the topology of the underlying network. The valid topology types include cartesian, hierarchical, complete, kautz, hypercube; additional types may be added as necessary. If @@ -471,7 +489,7 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou is cartesian,complete. All processes are connected by the cartesian part of this, but for each complete network, only the processes on the same node are connected. - + . cartDims - Return a string of comma-separated values describing the dimensions of the Cartesian topology. This must be consistent with the value of cartCoords that may be returned by @@ -482,7 +500,7 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou PMI interface and how extensions can be added within the same API and wire protocol. For example, adding more complex network topologies requires only adding new keys, not new routines. - + . isHeterogeneous - The value isHeterogeneous is true if the processes belonging to the job are running on nodes with different underlying data models. @@ -491,7 +509,7 @@ int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *fou int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); /*@ - PMI2_Nameserv_publish - publish a name + PMI2_Nameserv_publish - publish a name Input parameters: + service_name - string representing the service being published @@ -499,7 +517,7 @@ int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, i - port - string representing the port on which to contact the service Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_ptr, const char port[]); @@ -511,12 +529,12 @@ int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_pt + service_name - string representing the service being published . info_ptr - - portLen - size of buffer provided in port - + Output parameters: . port - string representing the port on which to contact the service Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr, @@ -529,10 +547,10 @@ int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr - info_ptr - Return values: - Returns 'MPI_SUCCESS' on success and an MPI error code on failure. + Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @*/ -int PMI2_Nameserv_unpublish(const char service_name[], +int PMI2_Nameserv_unpublish(const char service_name[], const PMI_keyval_t *info_ptr); diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in index 48f3fbf8098..0216e34aa24 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in @@ -470,10 +470,10 @@ typedef struct { } while(0); #define PMIX_INFO_LOAD(m, k, v, t) \ - if (NULL != (m)) { \ + do { \ (void)strncpy((m)->key, (k), PMIX_MAX_KEYLEN); \ pmix_value_load(&((m)->value), (v), (t)); \ - } + } while(0); /**** PMIX LOOKUP RETURN STRUCT ****/ diff --git a/opal/mca/pmix/pmix1xx/pmix/include/private/types.h b/opal/mca/pmix/pmix1xx/pmix/include/private/types.h index 9b45e1d9b96..e40c3687b3a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/private/types.h +++ b/opal/mca/pmix/pmix1xx/pmix/include/private/types.h @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c index cf453eeaf77..be3a894111b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c @@ -13,6 +13,8 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c index 45c8b201b73..2f7f8a4cb28 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c @@ -13,6 +13,8 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c index 9119e97be2c..808f9e39849 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c @@ -3,6 +3,8 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,53 +39,79 @@ #include "src/util/error.h" #include "src/util/output.h" +#define PMI_MAX_ID_LEN PMIX_MAX_NSLEN /* Maximim size of PMI process group ID */ +#define PMI_MAX_KEY_LEN PMIX_MAX_KEYLEN /* Maximum size of a PMI key */ +#define PMI_MAX_KVSNAME_LEN PMIX_MAX_NSLEN /* Maximum size of KVS name */ +#define PMI_MAX_VAL_LEN 4096 /* Maximum size of a PMI value */ + +#define PMI_CHECK() \ + do { \ + if (!pmi_init) { \ + return PMI_FAIL; \ + } \ + } while (0) + /* local functions */ static pmix_status_t convert_int(int *value, pmix_value_t *kv); static int convert_err(pmix_status_t rc); static pmix_proc_t myproc; static bool data_commited = false; +static int pmi_init = 0; -int PMI_Init( int *spawned ) +int PMI_Init(int *spawned) { - pmix_value_t *kv; + pmix_value_t *val; pmix_status_t rc; if (PMIX_SUCCESS != PMIx_Init(&myproc)) { return PMI_ERR_INIT; } - if (NULL == spawned) { - return PMI_SUCCESS; + if (NULL != spawned) { + /* get the spawned flag */ + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &val)) { + rc = convert_int(spawned, val); + PMIX_VALUE_RELEASE(val); + if (PMIX_SUCCESS != rc) { + return convert_err(rc); + } + } else { + /* if not found, default to not spawned */ + *spawned = 0; + } } + pmi_init = 1; - /* get the spawned flag - this will likely pull - * down all attributes assigned to the job, thus - * making all subsequent "get" operations purely - * local */ - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &kv)) { - rc = convert_int(spawned, kv); - PMIX_VALUE_RELEASE(kv); - return convert_err(rc); - } - /* if it wasn't found, then default to "not spawned" */ - *spawned = 0; return PMI_SUCCESS; } int PMI_Initialized(PMI_BOOL *initialized) { - *initialized = (PMI_BOOL)PMIx_Initialized(); + if (NULL == initialized) { + return PMI_ERR_INVALID_ARG; + } + + *initialized = (PMIx_Initialized() ? PMI_TRUE : PMI_FALSE); + return PMI_SUCCESS; } int PMI_Finalize(void) { - return PMIx_Finalize(); + pmix_status_t rc = PMIX_SUCCESS; + + PMI_CHECK(); + + pmi_init = 0; + rc = PMIx_Finalize(); + return convert_err(rc); } int PMI_Abort(int flag, const char msg[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + + PMI_CHECK(); rc = PMIx_Abort(flag, msg, NULL, 0); return convert_err(rc); @@ -93,12 +121,23 @@ int PMI_Abort(int flag, const char msg[]) * provided kvsname as we only put into our own nspace */ int PMI_KVS_Put(const char kvsname[], const char key[], const char value[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t val; + PMI_CHECK(); + + if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN)) { + return PMI_ERR_INVALID_KVS; + } + if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN)) { + return PMI_ERR_INVALID_KEY; + } + if ((value == NULL) || (strlen(value) > PMI_MAX_VAL_LEN)) { + return PMI_ERR_INVALID_VAL; + } + pmix_output_verbose(2, pmix_globals.debug_output, - "PMI_KVS_Put: KVS=%s, key=%s value=%s", - kvsname, key, value); + "PMI_KVS_Put: KVS=%s, key=%s value=%s", kvsname, key, value); val.type = PMIX_STRING; val.data.string = (char*)value; @@ -109,10 +148,16 @@ int PMI_KVS_Put(const char kvsname[], const char key[], const char value[]) /* KVS_Commit */ int PMI_KVS_Commit(const char kvsname[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; - pmix_output_verbose(2, pmix_globals.debug_output, - "PMI_KVS_Commit: KVS=%s", kvsname); + PMI_CHECK(); + + if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN)) { + return PMI_ERR_INVALID_KVS; + } + + pmix_output_verbose(2, pmix_globals.debug_output, "PMI_KVS_Commit: KVS=%s", + kvsname); rc = PMIx_Commit(); /* PMIx permits only one data commit! */ @@ -122,27 +167,38 @@ int PMI_KVS_Commit(const char kvsname[]) int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int length) { + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; uint32_t i; static pmix_proc_t proc; uint32_t procnum; proc = myproc; - int rc; + + PMI_CHECK(); + + if ((kvsname == NULL) || (strlen(kvsname) > PMI_MAX_KVSNAME_LEN)) { + return PMI_ERR_INVALID_KVS; + } + if ((key == NULL) || (strlen(key) >PMI_MAX_KEY_LEN)) { + return PMI_ERR_INVALID_KEY; + } + if (value == NULL) { + return PMI_ERR_INVALID_VAL; + } pmix_output_verbose(2, pmix_globals.debug_output, - "PMI_KVS_Get: KVS=%s, key=%s value=%s", - kvsname, key, value); + "PMI_KVS_Get: KVS=%s, key=%s value=%s", kvsname, key, value); /* PMI-1 expects resource manager to set * process mapping in ANL notation. */ - if( !strcmp(key, ANL_MAPPING) ) { + if (!strcmp(key, ANL_MAPPING)) { /* we are looking in the job-data. If there is nothing there * we don't want to look in rank's data, thus set rank to widcard */ proc.rank = PMIX_RANK_WILDCARD; - if( PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && - (NULL != val) && (PMIX_STRING == val->type) ){ - strncpy(value,val->data.string,length); - PMIX_VALUE_FREE(val,1); + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_ANL_MAP, NULL, 0, &val) && + (NULL != val) && (PMIX_STRING == val->type)) { + strncpy(value, val->data.string, length); + PMIX_VALUE_FREE(val, 1); return PMI_SUCCESS; } else { /* artpol: @@ -167,47 +223,49 @@ int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int lengt * an error and don't try to use direct modex. */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc,PMIX_JOB_SIZE, NULL, 0,&val))) { + if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) { pmix_output_verbose(2, pmix_globals.debug_output, - "pmi1: executing put for KVS %s, key %s value %s", - kvsname, key, value); - return convert_err(rc); + "pmi1: executing put for KVS %s, key %s value %s", kvsname, key, + value); + return convert_err(rc); } procnum = val->data.uint32; - PMIX_VALUE_FREE(val,1); + PMIX_VALUE_FREE(val, 1); - for( i=0; i < procnum; i++){ + for (i = 0; i < procnum; i++) { proc.rank = i; - if( PMIX_SUCCESS == PMIx_Get(&proc, key, NULL, 0, &val) && - (NULL != val) && (PMIX_STRING == val->type) ){ - strncpy(value,val->data.string,length); - PMIX_VALUE_FREE(val,1); + if (PMIX_SUCCESS == PMIx_Get(&proc, key, NULL, 0, &val) && (NULL != val) + && (PMIX_STRING == val->type)) { + strncpy(value, val->data.string, length); + PMIX_VALUE_FREE(val, 1); return PMI_SUCCESS; } - PMIX_VALUE_FREE(val,1); + PMIX_VALUE_FREE(val, 1); } return PMI_FAIL; } - /* Barrier only applies to our own nspace, and we want all * data to be collected upon completion */ int PMI_Barrier(void) { + pmix_status_t rc = PMIX_SUCCESS; pmix_info_t buf; - int rc, ninfo = 0; + int ninfo = 0; pmix_info_t *info = NULL; - if( data_commited ){ - bool val = 1; + PMI_CHECK(); + + if (data_commited) { + bool val = 1; info = &buf; PMIX_INFO_CONSTRUCT(info); - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL ); + PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &val, PMIX_BOOL); ninfo = 1; } rc = PMIx_Fence(NULL, 0, info, ninfo); - if( NULL != info ){ + if (NULL != info) { PMIX_INFO_DESTRUCT(info); } return rc; @@ -215,16 +273,18 @@ int PMI_Barrier(void) int PMI_Get_size(int *size) { - pmix_value_t *kv; - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + pmix_value_t *val; + + PMI_CHECK(); if (NULL == size) { - return PMI_FAIL; + return PMI_ERR_INVALID_ARG; } - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &kv)) { - rc = convert_int(size, kv); - PMIX_VALUE_RELEASE(kv); + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val)) { + rc = convert_int(size, val); + PMIX_VALUE_RELEASE(val); return convert_err(rc); } @@ -233,26 +293,30 @@ int PMI_Get_size(int *size) int PMI_Get_rank(int *rk) { + PMI_CHECK(); + if (NULL == rk) { - return PMI_FAIL; + return PMI_ERR_INVALID_ARG; } - *rk = pmix_globals.myid.rank; + *rk = myproc.rank; return PMI_SUCCESS; } int PMI_Get_universe_size(int *size) { - pmix_value_t *kv; - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + pmix_value_t *val; + + PMI_CHECK(); if (NULL == size) { - return PMI_FAIL; + return PMI_ERR_INVALID_ARG; } - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &kv)) { - rc = convert_int(size, kv); - PMIX_VALUE_RELEASE(kv); + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val)) { + rc = convert_int(size, val); + PMIX_VALUE_RELEASE(val); return convert_err(rc); } return PMI_FAIL; @@ -260,13 +324,15 @@ int PMI_Get_universe_size(int *size) int PMI_Get_appnum(int *appnum) { - pmix_value_t *kv; - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + pmix_value_t *val; + + PMI_CHECK(); if (NULL != appnum && - PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &kv)) { - rc = convert_int(appnum, kv); - PMIX_VALUE_RELEASE(kv); + PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &val)) { + rc = convert_int(appnum, val); + PMIX_VALUE_RELEASE(val); return convert_err(rc); } @@ -275,16 +341,19 @@ int PMI_Get_appnum(int *appnum) int PMI_Publish_name(const char service_name[], const char port[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_info_t info; + PMI_CHECK(); + if (NULL == service_name || NULL == port) { - return convert_err(PMIX_ERR_BAD_PARAM); + return PMI_ERR_INVALID_ARG; } + /* pass the service/port */ - (void)strncpy(info.key, service_name, PMIX_MAX_KEYLEN); + (void) strncpy(info.key, service_name, PMIX_MAX_KEYLEN); info.value.type = PMIX_STRING; - info.value.data.string = (char*)port; + info.value.data.string = (char*) port; /* publish the info - PMI-1 doesn't support * any scope other than inside our own nspace */ @@ -295,11 +364,17 @@ int PMI_Publish_name(const char service_name[], const char port[]) int PMI_Unpublish_name(const char service_name[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; char *keys[2]; + PMI_CHECK(); + + if (NULL == service_name) { + return PMI_ERR_INVALID_ARG; + } + /* pass the service */ - keys[0] = (char*)service_name; + keys[0] = (char*) service_name; keys[1] = NULL; rc = PMIx_Unpublish(keys, NULL, 0); @@ -308,13 +383,19 @@ int PMI_Unpublish_name(const char service_name[]) int PMI_Lookup_name(const char service_name[], char port[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_pdata_t pdata; + PMI_CHECK(); + + if (NULL == service_name || NULL == port) { + return PMI_ERR_INVALID_ARG; + } + PMIX_PDATA_CONSTRUCT(&pdata); /* pass the service */ - (void)strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); + (void) strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); /* PMI-1 doesn't want the nspace back */ if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) { @@ -322,8 +403,7 @@ int PMI_Lookup_name(const char service_name[], char port[]) } /* should have received a string back */ - if (PMIX_STRING != pdata.value.type || - NULL == pdata.value.data.string) { + if (PMIX_STRING != pdata.value.type || NULL == pdata.value.data.string) { return convert_err(PMIX_ERR_NOT_FOUND); } @@ -332,7 +412,7 @@ int PMI_Lookup_name(const char service_name[], char port[]) * potential we could overrun it. As this feature * isn't widely supported in PMI-1, try being * conservative */ - (void)strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); + (void) strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); PMIX_PDATA_DESTRUCT(&pdata); return PMIX_SUCCESS; @@ -343,37 +423,54 @@ int PMI_Get_id(char id_str[], int length) /* we already obtained our nspace during PMI_Init, * so all we have to do here is return it */ + PMI_CHECK(); + /* bozo check */ if (NULL == id_str) { return PMI_ERR_INVALID_ARGS; } - (void)strncpy(id_str, pmix_globals.myid.nspace, length); + if (length < PMI_MAX_ID_LEN) { + return PMI_ERR_INVALID_LENGTH; + } + + (void) strncpy(id_str, myproc.nspace, length); return PMI_SUCCESS; } int PMI_Get_kvs_domain_id(char id_str[], int length) { + PMI_CHECK(); + /* same as PMI_Get_id */ return PMI_Get_id(id_str, length); } int PMI_Get_id_length_max(int *length) { + PMI_CHECK(); + if (NULL == length) { return PMI_ERR_INVALID_VAL_LENGTH; } - *length = PMIX_MAX_KEYLEN; + + *length = PMI_MAX_ID_LEN; return PMI_SUCCESS; } int PMI_Get_clique_size(int *size) { - pmix_value_t *kv; - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + pmix_value_t *val; - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &kv)) { - rc = convert_int(size, kv); - PMIX_VALUE_RELEASE(kv); + PMI_CHECK(); + + if (NULL == size) { + return PMI_ERR_INVALID_ARGS; + } + + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) { + rc = convert_int(size, val); + PMIX_VALUE_RELEASE(val); return convert_err(rc); } @@ -382,19 +479,25 @@ int PMI_Get_clique_size(int *size) int PMI_Get_clique_ranks(int ranks[], int length) { - pmix_value_t *kv; + pmix_value_t *val; char **rks; int i; - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_PEERS, NULL, 0, &kv)) { + PMI_CHECK(); + + if (NULL == ranks) { + return PMI_ERR_INVALID_ARGS; + } + + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_PEERS, NULL, 0, &val)) { /* kv will contain a string of comma-separated * ranks on my node */ - rks = pmix_argv_split(kv->data.string, ','); - for (i=0; NULL != rks[i] && i < length; i++) { + rks = pmix_argv_split(val->data.string, ','); + for (i = 0; NULL != rks[i] && i < length; i++) { ranks[i] = strtol(rks[i], NULL, 10); } pmix_argv_free(rks); - PMIX_VALUE_RELEASE(kv); + PMIX_VALUE_RELEASE(val); return PMI_SUCCESS; } return PMI_FAIL; @@ -402,36 +505,47 @@ int PMI_Get_clique_ranks(int ranks[], int length) int PMI_KVS_Get_my_name(char kvsname[], int length) { + PMI_CHECK(); + /* same as PMI_Get_id */ return PMI_Get_id(kvsname, length); } int PMI_KVS_Get_name_length_max(int *length) { - if (NULL == length) { - return PMI_ERR_INVALID_VAL_LENGTH; + PMI_CHECK(); + + if (NULL == length) { + return PMI_ERR_INVALID_ARG; } - *length = PMIX_MAX_NSLEN; + + *length = PMI_MAX_KVSNAME_LEN; return PMI_SUCCESS; } int PMI_KVS_Get_key_length_max(int *length) { + PMI_CHECK(); + if (NULL == length) { - return PMI_ERR_INVALID_VAL_LENGTH; + return PMI_ERR_INVALID_ARG; } - *length = PMIX_MAX_KEYLEN; + + *length = PMI_MAX_KEY_LEN; return PMI_SUCCESS; } int PMI_KVS_Get_value_length_max(int *length) { + PMI_CHECK(); + if (NULL == length) { - return PMI_ERR_INVALID_VAL_LENGTH; + return PMI_ERR_INVALID_ARG; } + /* don't give them an enormous size of some implementations * immediately malloc a data block for their use */ - *length = 4096; + *length = PMI_MAX_VAL_LEN; return PMI_SUCCESS; } @@ -473,31 +587,37 @@ int PMI_Spawn_multiple(int count, const PMI_keyval_t preput_keyval_vector[], int errors[]) { + pmix_status_t rc = PMIX_SUCCESS; pmix_app_t *apps; int i, k; - pmix_status_t rc; size_t j; char *evar; + PMI_CHECK(); + + if (NULL == cmds) { + return PMI_ERR_INVALID_ARG; + } + /* setup the apps */ PMIX_APP_CREATE(apps, count); - for (i=0; i < count; i++) { + for (i = 0; i < count; i++) { apps[i].cmd = strdup(cmds[i]); apps[i].maxprocs = maxprocs[i]; - apps[i].argv = pmix_argv_copy((char**)argvs[i]); + apps[i].argv = pmix_argv_copy((char**) argvs[i]); apps[i].argc = pmix_argv_count(apps[i].argv); apps[i].ninfo = info_keyval_sizesp[i]; if (0 < apps[i].ninfo) { apps[i].info = (pmix_info_t*)malloc(apps[i].ninfo * sizeof(pmix_info_t)); /* copy the info objects */ - for (j=0; j < apps[i].ninfo; j++) { + for (j = 0; j < apps[i].ninfo; j++) { (void)strncpy(apps[i].info[j].key, info_keyval_vectors[i][j].key, PMIX_MAX_KEYLEN); apps[i].info[j].value.type = PMIX_STRING; apps[i].info[j].value.data.string = strdup(info_keyval_vectors[i][j].val); } } /* push the preput values into the apps environ */ - for (k=0; k < preput_keyval_size; k++) { + for (k = 0; k < preput_keyval_size; k++) { (void)asprintf(&evar, "%s=%s", preput_keyval_vector[k].key, preput_keyval_vector[k].val); pmix_argv_append_nosize(&apps[i].env, evar); free(evar); @@ -506,12 +626,12 @@ int PMI_Spawn_multiple(int count, rc = PMIx_Spawn(NULL, 0, apps, count, NULL); /* tear down the apps array */ - for (i=0; i < count; i++) { + for (i = 0; i < count; i++) { PMIX_APP_DESTRUCT(&apps[i]); } free(apps); if (NULL != errors) { - for (i=0; i < count; i++) { + for (i = 0; i < count; i++) { errors[i] = convert_err(rc); } } @@ -546,12 +666,11 @@ int PMI_Get_options(char *str, int *length) return PMI_FAIL; } - /*** UTILITY FUNCTIONS ***/ /* internal function */ static pmix_status_t convert_int(int *value, pmix_value_t *kv) { - switch(kv->type) { + switch (kv->type) { case PMIX_INT: *value = kv->data.integer; break; @@ -600,7 +719,7 @@ static pmix_status_t convert_int(int *value, pmix_value_t *kv) static int convert_err(pmix_status_t rc) { - switch(rc) { + switch (rc) { case PMIX_ERR_INVALID_SIZE: return PMI_ERR_INVALID_SIZE; diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c index 964bc43c76c..7c9df035af9 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c @@ -1,8 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,15 +37,24 @@ #include "src/util/error.h" #include "src/util/output.h" +#define PMI2_CHECK() \ + do { \ + if (!pmi2_init) { \ + return PMI2_FAIL; \ + } \ + } while (0) + /* local functions */ static pmix_status_t convert_int(int *value, pmix_value_t *kv); static int convert_err(pmix_status_t rc); static pmix_proc_t myproc; +static int pmi2_init = 0; int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) { - pmix_value_t *kv; - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + pmix_value_t *val; + pmix_proc_t proc; if (PMIX_SUCCESS != PMIx_Init(&myproc)) { return PMI2_ERR_INIT; @@ -52,14 +63,18 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) /* get the rank */ *rank = myproc.rank; + /* getting internal key requires special rank value */ + memcpy(&proc, &myproc, sizeof(myproc)); + proc.rank = PMIX_RANK_WILDCARD; + if (NULL != size) { /* get the universe size - this will likely pull * down all attributes assigned to the job, thus * making all subsequent "get" operations purely * local */ - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &kv)) { - rc = convert_int(size, kv); - PMIX_VALUE_RELEASE(kv); + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) { + rc = convert_int(size, val); + PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } @@ -71,9 +86,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) if (NULL != spawned) { /* get the spawned flag */ - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_SPAWNED, NULL, 0, &kv)) { - rc = convert_int(spawned, kv); - PMIX_VALUE_RELEASE(kv); + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_SPAWNED, NULL, 0, &val)) { + rc = convert_int(spawned, val); + PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } @@ -85,9 +100,9 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) if (NULL != appnum) { /* get our appnum */ - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_APPNUM, NULL, 0, &kv)) { - rc = convert_int(appnum, kv); - PMIX_VALUE_RELEASE(kv); + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_APPNUM, NULL, 0, &val)) { + rc = convert_int(appnum, val); + PMIX_VALUE_RELEASE(val); if (PMIX_SUCCESS != rc) { goto error; } @@ -96,6 +111,7 @@ int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) *appnum = 0; } } + pmi2_init = 1; return PMI2_SUCCESS; @@ -112,15 +128,20 @@ int PMI2_Initialized(void) int PMI2_Finalize(void) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + + PMI2_CHECK(); + pmi2_init = 0; rc = PMIx_Finalize(); return convert_err(rc); } int PMI2_Abort(int flag, const char msg[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + + PMI2_CHECK(); rc = PMIx_Abort(flag, msg, NULL, 0); return convert_err(rc); @@ -129,9 +150,18 @@ int PMI2_Abort(int flag, const char msg[]) /* KVS_Put - we default to PMIX_GLOBAL scope */ int PMI2_KVS_Put(const char key[], const char value[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t val; + PMI2_CHECK(); + + if ((NULL == key) || (NULL == value)) { + return PMI2_ERR_INVALID_ARG; + } + + pmix_output_verbose(3, pmix_globals.debug_output, + "PMI2_KVS_Put: key=%s value=%s", key, value); + val.type = PMIX_STRING; val.data.string = (char*)value; rc = PMIx_Put(PMIX_GLOBAL, key, &val); @@ -141,14 +171,26 @@ int PMI2_KVS_Put(const char key[], const char value[]) /* KVS_Fence */ int PMI2_KVS_Fence(void) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; + + PMI2_CHECK(); if (PMIX_SUCCESS != (rc = PMIx_Commit())) { return convert_err(rc); } /* we want all data to be collected upon completion */ - rc = PMIx_Fence(NULL, 0, NULL, 0); + { + pmix_info_t info; + int ninfo = 1; + bool val = 1; + + PMIX_INFO_CONSTRUCT(&info); + PMIX_INFO_LOAD(&info, PMIX_COLLECT_DATA, &val, PMIX_BOOL); + rc = PMIx_Fence(NULL, 0, &info, ninfo); + PMIX_INFO_DESTRUCT(&info); + } + return convert_err(rc); } @@ -161,33 +203,68 @@ int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxvalue, int *vallen) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; pmix_proc_t proc; + uint32_t procnum = 0; - (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); - proc.rank = (src_pmi_id == PMI2_ID_NULL ? myproc.rank : src_pmi_id); - rc = PMIx_Get(&proc, key, NULL, 0, &val); - if (PMIX_SUCCESS == rc && NULL != val) { - if (PMIX_STRING != val->type) { - /* this is an error */ - PMIX_VALUE_RELEASE(val); - return PMI2_FAIL; - } - if (NULL != val->data.string) { - (void)strncpy(value, val->data.string, maxvalue); - *vallen = strlen(val->data.string); + PMI2_CHECK(); + + if ((NULL == key) || (NULL == value)) { + return PMI2_ERR_INVALID_ARG; + } + + pmix_output_verbose(3, pmix_globals.debug_output, + "PMI2_KVS_Get: key=%s jobid=%s src_pmi_id=%d", key, (jobid ? jobid : "null"), src_pmi_id); + + (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), PMIX_MAX_NSLEN); + if (src_pmi_id == PMI2_ID_NULL) { + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_JOB_SIZE, NULL, 0, &val))) { + return convert_err(rc); } + procnum = val->data.uint32; PMIX_VALUE_RELEASE(val); + proc.rank = 0; + } else { + proc.rank = src_pmi_id; } + + do { + rc = PMIx_Get(&proc, key, NULL, 0, &val); + if (PMIX_SUCCESS == rc && NULL != val) { + if (PMIX_STRING != val->type) { + /* this is an error */ + PMIX_VALUE_RELEASE(val); + return PMI2_FAIL; + } + if (NULL != val->data.string) { + (void)strncpy(value, val->data.string, maxvalue); + *vallen = strlen(val->data.string); + } + PMIX_VALUE_RELEASE(val); + break; + } else if (PMIX_ERR_NOT_FOUND == rc) { + proc.rank++; + } else { + break; + } + } while (proc.rank < (int)procnum); + return convert_err(rc); } int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + PMI2_CHECK(); + + if ((NULL == name) || (NULL == value) || (NULL == found)) { + return PMI2_ERR_INVALID_ARG; + } + *found = 0; rc = PMIx_Get(&myproc, name, NULL, 0, &val); if (PMIX_SUCCESS == rc && NULL != val) { @@ -210,9 +287,15 @@ int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *fo /* push info at the PMIX_LOCAL scope */ int PMI2_Info_PutNodeAttr(const char name[], const char value[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t val; + PMI2_CHECK(); + + if ((NULL == name) || (NULL == value)) { + return PMI2_ERR_INVALID_ARG; + } + val.type = PMIX_STRING; val.data.string = (char*)value; rc = PMIx_Put(PMIX_LOCAL, name, &val); @@ -221,11 +304,22 @@ int PMI2_Info_PutNodeAttr(const char name[], const char value[]) int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; + pmix_proc_t proc; + + PMI2_CHECK(); + + if ((NULL == name) || (NULL == value) || (NULL == found)) { + return PMI2_ERR_INVALID_ARG; + } + + /* getting internal key requires special rank value */ + memcpy(&proc, &myproc, sizeof(myproc)); + proc.rank = PMIX_RANK_WILDCARD; *found = 0; - rc = PMIx_Get(&myproc, name, NULL, 0, &val); + rc = PMIx_Get(&proc, name, NULL, 0, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { /* this is an error */ @@ -250,13 +344,16 @@ int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, i int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_ptr, const char port[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; int nvals; pmix_info_t info[2]; + PMI2_CHECK(); + if (NULL == service_name || NULL == port) { return PMI2_ERR_INVALID_ARG; } + /* pass the service/port */ (void)strncpy(info[0].key, service_name, PMIX_MAX_KEYLEN); info[0].value.type = PMIX_STRING; @@ -280,8 +377,14 @@ int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_pt int PMI2_Nameserv_unpublish(const char service_name[], const PMI_keyval_t *info_ptr) { + pmix_status_t rc = PMIX_SUCCESS; char *keys[3]; - pmix_status_t rc; + + PMI2_CHECK(); + + if (NULL == service_name || NULL == info_ptr) { + return PMI2_ERR_INVALID_ARG; + } /* pass the service */ keys[0] = (char*)service_name; @@ -300,10 +403,16 @@ int PMI2_Nameserv_unpublish(const char service_name[], int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr, char port[], int portLen) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; int nvals; pmix_pdata_t pdata[2]; + PMI2_CHECK(); + + if (NULL == service_name || NULL == info_ptr || NULL == port) { + return PMI2_ERR_INVALID_ARG; + } + PMIX_PDATA_CONSTRUCT(&pdata[0]); PMIX_PDATA_CONSTRUCT(&pdata[1]); @@ -347,9 +456,11 @@ int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr int PMI2_Job_GetId(char jobid[], int jobid_size) { - /* we already obtained our nspace during PMI_Init, + /* we already obtained our nspace during pmi2_init, * so all we have to do here is return it */ + PMI2_CHECK(); + /* bozo check */ if (NULL == jobid) { return PMI2_ERR_INVALID_ARGS; @@ -358,11 +469,48 @@ int PMI2_Job_GetId(char jobid[], int jobid_size) return PMI2_SUCCESS; } +int PMI2_Job_GetRank(int *rank) +{ + PMI2_CHECK(); + + if (NULL == rank) { + return PMI2_ERR_INVALID_ARGS; + } + *rank = myproc.rank; + return PMI2_SUCCESS; +} + +int PMI2_Info_GetSize(int *size) +{ + pmix_status_t rc = PMIX_SUCCESS; + pmix_value_t *val; + + PMI2_CHECK(); + + if (NULL == size) { + return PMI2_ERR_INVALID_ARGS; + } + + if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, NULL, 0, &val)) { + rc = convert_int(size, val); + PMIX_VALUE_RELEASE(val); + return convert_err(rc); + } + + return PMI2_FAIL; +} + int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_proc_t proc; + PMI2_CHECK(); + + if (NULL == jobid || NULL == conn) { + return PMI2_ERR_INVALID_ARGS; + } + (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Connect(&proc, 1, NULL, 0); @@ -371,9 +519,15 @@ int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) int PMI2_Job_Disconnect(const char jobid[]) { - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_proc_t proc; + PMI2_CHECK(); + + if (NULL == jobid) { + return PMI2_ERR_INVALID_ARGS; + } + (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Disconnect(&proc, 1, NULL, 0); @@ -390,12 +544,18 @@ int PMI2_Job_Spawn(int count, const char * cmds[], char jobId[], int jobIdSize, int errors[]) { + pmix_status_t rc = PMIX_SUCCESS; pmix_app_t *apps; int i, k; - pmix_status_t rc; size_t j; char *evar; + PMI2_CHECK(); + + if (NULL == cmds) { + return PMI2_ERR_INVALID_ARGS; + } + /* setup the apps */ PMIX_APP_CREATE(apps, count); for (i=0; i < count; i++) { diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c index fcd08de9d03..1cf053eebde 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c index d36a1ade8aa..55078ebf38c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c index b3cc2fdff68..d41be9cbe31 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c index 8eb69668495..d16ae16212a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c index e76c557b38d..4a4abd1074f 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c @@ -5,6 +5,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h index 21091b6783a..c6279d53922 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h @@ -3,6 +3,8 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ */ diff --git a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c b/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c index b78a124d7ea..5156f6a3aa5 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c @@ -4,6 +4,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c b/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c index 759be74e825..7fed631a284 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c @@ -1,19 +1,7 @@ /* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,6 +62,8 @@ static int test_item5(void); static int test_item6(void); static int test_item7(void); static int test_item8(void); +/* several sequence of fences is a buggy case for pmix v1.0 (see https://github.com/open-mpi/pmix/issues/37) */ +static int test_item9(void); static int spawned, size, rank, appnum; static char jobid[100]; @@ -149,6 +139,12 @@ int main(int argc, char **argv) log_info("TI8 : %s\n", (rc ? "FAIL" : "PASS")); } + if (!ti || 9 == ti) { + rc = test_item9(); + ret += (rc ? 1 : 0); + log_info("TI9 : %s\n", (rc ? "FAIL" : "PASS")); + } + if (PMI2_SUCCESS != (rc = PMI2_Finalize())) { log_fatal("PMI2_Finalize failed: %d\n", rc); return rc; @@ -160,6 +156,7 @@ int main(int argc, char **argv) static int test_item1(void) { int rc = 0; + int val = 0; log_info("spawned=%d size=%d rank=%d appnum=%d\n", spawned, size, rank, appnum); @@ -175,9 +172,22 @@ static int test_item1(void) } log_info("jobid=%s\n", jobid); - log_assert(memcmp(jobid, __FUNCTION__, sizeof(__FUNCTION__)), ""); + val = random_value(10, 100); + if (PMI2_SUCCESS != (rc = PMI2_Job_GetRank(&val))) { + log_fatal("PMI2_Job_GetRank failed: %d\n", rc); + return rc; + } + log_assert(rank == val, ""); + + val = -1; + if (PMI2_SUCCESS != (rc = PMI2_Info_GetSize(&val))) { + log_fatal("PMI2_Info_GetSize failed: %d\n", rc); + return rc; + } + log_assert(0 < val, ""); + return rc; } @@ -333,8 +343,8 @@ static int test_item7(void) return rc; } - if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, PMI2_ID_NULL, tkey, val, sizeof(val), &len))) { - log_fatal("PMI2_KVS_Get %d\n", rc); + if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, i, tkey, val, sizeof(val), &len))) { + log_fatal("PMI2_KVS_Get [%s=?] %d\n", tkey, rc); return rc; } @@ -348,18 +358,58 @@ static int test_item7(void) } static int test_item8(void) +{ + int rc = 0; + int len; + char tkey[PMI2_MAX_VALLEN]; + char tval[PMI2_MAX_VALLEN]; + char val[PMI2_MAX_VALLEN]; + int i = 0; + + for (i = 0; i < size; i++) { + sprintf(tkey, "KEY-%d", i); + sprintf(tval, "VALUE-%d", i); + if (i == rank) { + if (PMI2_SUCCESS != (rc = PMI2_KVS_Put(tkey, tval))) { + log_fatal("PMI2_KVS_Put [%s=%s] %d\n", tkey, tval, rc); + return rc; + } + } + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) { + log_fatal("PMI2_KVS_Fence %d\n", rc); + return rc; + } + + if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, PMI2_ID_NULL, tkey, val, sizeof(val), &len))) { + log_fatal("PMI2_KVS_Get [%s=?] %d\n", tkey, rc); + return rc; + } + + log_info("tkey=%s tval=%s val=%s len=%d\n", tkey, tval, val, len); + + log_assert((int)strlen(tval) == len, "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + } + + return rc; +} + +static int test_item9(void) { int rc = 0; int i, j, r; char symb, symb_start = 'a'; int fence_cnt; int fence_num = random_value(2, 10); - int keys_per_fence = random_value(10, 1000); - int val_size = random_value(10, PMI2_MAX_VALLEN); + int keys_per_fence = random_value(10, 100); + int val_size = random_value(10, PMI2_MAX_VALLEN / 10); int keys_total = 0; fence_cnt = 0; while (fence_cnt < fence_num) { + log_info("fence_cnt=%d of fence_num=%d keys_per_fence=%d keys_total=%d val_size=%d\n", + fence_cnt, fence_num, keys_per_fence, keys_total, val_size); symb = symb_start; for (i = 0; i < keys_per_fence; i++) { char key[PMI2_MAX_KEYLEN]; @@ -376,6 +426,7 @@ static int test_item8(void) log_fatal("PMI2_KVS_Put [%s=%s] %d\n", key, val, rc); return rc; } + log_info("PMI2_KVS_Put [rank=%d %s] %d\n", rank, key, rc); } symb_start = symb; keys_total += keys_per_fence; @@ -393,11 +444,13 @@ static int test_item8(void) char val[PMI2_MAX_VALLEN] = ""; sprintf(key, "RANK%d-key-%d", r, i); - if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, PMI2_ID_NULL, key, val, sizeof(val), &len))) { - log_fatal("PMI2_KVS_Get %d\n", rc); + if (PMI2_SUCCESS != (rc = PMI2_KVS_Get(jobid, r, key, val, sizeof(val), &len))) { + log_fatal("PMI2_KVS_Get [%s=?] %d\n", key, rc); return rc; } + log_info("PMI2_KVS_Get [rank=%d %s] %d\n", rank, key, rc); + if (len != val_size) { log_fatal("%d: failure on rank %d, key #%d: len mismatch:" " %d instead of %d\n", rank, r, i, len, val_size); diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c b/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c index 8ea0de49c35..0bfe837513b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c @@ -1,19 +1,7 @@ /* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,53 +12,411 @@ #include #include +#include +#include #include "pmi.h" +/* Target is legacy SLURM pmi library implementation */ +static int _legacy = 0; +/* Verbose level 0-silent, 1-fatal, 2-error, 3+ debug*/ +static int _verbose = 1; + +#define log_fatal(fmt, ...) \ + do { \ + if (_verbose > 0) \ + fprintf(stderr, "FATAL " fmt, ##__VA_ARGS__); \ + exit(rc); \ + } while (0) + +#define log_error(fmt) \ + do { \ + if (_verbose > 1) \ + fprintf(stderr, "ERROR " fmt); \ + } while (0) + +#define log_info(fmt, ...) \ + do { \ + if (_verbose > 2) \ + fprintf(stderr, "INFO " fmt, ##__VA_ARGS__); \ + } while (0) + +#define log_assert(e, msg) \ + do { \ + if (!(e)) { \ + log_fatal("%s at %s:%d\n", msg, __FUNCTION__, __LINE__); \ + rc = -1; \ + } \ + } while (0) + +static inline long random_value(long min_value, long max_value) +{ + return ((min_value >= max_value) ? min_value : min_value + (rand() % (max_value - min_value + 1))); +} + +static int test_item1(void); +static int test_item2(void); +static int test_item3(void); +static int test_item4(void); +static int test_item5(void); +static int test_item6(void); +static int test_item7(void); +static int test_item8(void); + +static int spawned, size, rank, appnum; +static char jobid[255]; + + int main(int argc, char **argv) { - int spawned; + int ret = 0; int rc; + char *str = NULL; + int ti = (argc > 1 ? atoi(argv[1]) : 0); - /* init us */ + srand(time(NULL)); + str = getenv("VERBOSE"); + _verbose = (str ? atoi(str) : _verbose); + str = getenv("LEGACY"); + _legacy = (str ? atoi(str) : _legacy); + + spawned = random_value(10, 20); + size = random_value(10, 20); + rank = random_value(10, 20); + appnum = random_value(10, 20); if (PMI_SUCCESS != (rc = PMI_Init(&spawned))) { - fprintf(stderr, "PMI_Init failed: %d\n", rc); + log_fatal("PMI_Init failed: %d\n", rc); return rc; } -#if 0 - key = "local-key"; - PMIX_VAL_SET(&value, int, 12345, rc, kvp_error ); - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, key, &value))) { - fprintf(stderr, "PMIx_Put failed: %d\n", rc); + + /* this test should be always run */ + if (1) { + rc = test_item1(); + ret += (rc ? 1 : 0); + log_info("TI1 : %s\n", (rc ? "FAIL" : "PASS")); } - key = "remote-key"; - char *ptr = "Test string"; - PMIX_VAL_SET(&value, string, ptr, rc, kvp_error ); - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, key, &value))) { - fprintf(stderr, "PMIx_Put failed: %d\n", rc); + if (!ti || 2 == ti) { + rc = test_item2(); + ret += (rc ? 1 : 0); + log_info("TI2 : %s\n", (rc ? "FAIL" : "PASS")); } - key = "global-key"; - PMIX_VAL_SET(&value, float, 10.15, rc, kvp_error ); - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, key, &value))) { - fprintf(stderr, "PMIx_Put failed: %d\n", rc); + if (!ti || 3 == ti) { + rc = test_item3(); + ret += (rc ? 1 : 0); + log_info("TI3 : %s\n", (rc ? "FAIL" : "PASS")); } - /* Submit the data */ - pmix_range_t range; - range.ranks = NULL; - range.nranks = 0; - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0))) { - fprintf(stderr, "PMIx_Fence failed: %d\n", rc); - return rc; + if (!ti || 4 == ti) { + rc = test_item4(); + ret += (rc ? 1 : 0); + log_info("TI4 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 5 == ti) { + rc = test_item5(); + ret += (rc ? 1 : 0); + log_info("TI5 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 6 == ti) { + rc = test_item6(); + ret += (rc ? 1 : 0); + log_info("TI6 : %s\n", (rc ? "FAIL" : "PASS")); + } + + if (!ti || 7 == ti) { + rc = test_item7(); + ret += (rc ? 1 : 0); + log_info("TI7 : %s\n", (rc ? "FAIL" : "PASS")); } -#endif - /* finalize us */ + + if (!ti || 8 == ti) { + rc = test_item8(); + ret += (rc ? 1 : 0); + log_info("TI8 : %s\n", (rc ? "FAIL" : "PASS")); + } + if (PMI_SUCCESS != (rc = PMI_Finalize())) { - fprintf(stderr, "PMI_Finalize failed: %d\n", rc); + log_fatal("PMI_Finalize failed: %d\n", rc); + return rc; + } + + return ret; +} + +static int test_item1(void) +{ + int rc = 0; + int val = 0; + + log_assert(spawned == PMI_FALSE || spawned == PMI_TRUE, ""); + + if (PMI_SUCCESS != (rc = PMI_Get_size(&size))) { + log_fatal("PMI_Get_Size failed: %d\n", rc); + return rc; + } + log_assert(size >= 0, ""); + + if (PMI_SUCCESS != (rc = PMI_Get_rank(&rank))) { + log_fatal("PMI_Get_Rank failed: %d\n", rc); + return rc; + } + log_assert(rank >= 0, ""); + log_assert(rank < size, ""); + + if (PMI_SUCCESS != (rc = PMI_Get_appnum(&appnum))) { + log_fatal("PMI_Get_appnum failed: %d\n", rc); + return rc; + } + + log_info("spawned=%d size=%d rank=%d appnum=%d\n", spawned, size, rank, appnum); + + val = random_value(10, 100); + if (PMI_SUCCESS != (rc = PMI_Get_universe_size(&val))) { + log_fatal("PMI_Get_universe_size failed: %d\n", rc); + return rc; + } + log_assert(size == val, ""); + + val = random_value(10, 100); + if (PMI_SUCCESS != (rc = PMI_Get_id_length_max(&val))) { + log_fatal("PMI_Get_id_length_max failed: %d\n", rc); + return rc; + } + log_info("PMI_Get_id_length_max=%d\n", val); + if (!_legacy) { + log_assert(sizeof(jobid) == val, "Check PMIX_MAX_NSLEN value in pmix_common.h"); + } + + sprintf(jobid, "%s", __FUNCTION__); + if (PMI_SUCCESS != (rc = PMI_Get_id(jobid, sizeof(jobid)))) { + log_fatal("PMI_Get_id failed: %d\n", rc); + return rc; + } + + log_info("jobid=%s\n", jobid); + log_assert(memcmp(jobid, __FUNCTION__, sizeof(__FUNCTION__)), ""); + + sprintf(jobid, "%s", __FUNCTION__); + if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(jobid, sizeof(jobid)))) { + log_fatal("PMI_Get_kvs_domain_id failed: %d\n", rc); + return rc; + } + + log_info("PMI_Get_kvs_domain_id=%s\n", jobid); + log_assert(memcmp(jobid, __FUNCTION__, sizeof(__FUNCTION__)), ""); + + sprintf(jobid, "%s", __FUNCTION__); + if (PMI_SUCCESS != (rc = PMI_KVS_Get_my_name(jobid, sizeof(jobid)))) { + log_fatal("PMI_KVS_Get_my_name failed: %d\n", rc); + return rc; + } + + log_info("PMI_KVS_Get_my_name=%s\n", jobid); + log_assert(memcmp(jobid, __FUNCTION__, sizeof(__FUNCTION__)), ""); + + return rc; +} + +static int test_item2(void) +{ + int rc = 0; + PMI_BOOL val; + + if (PMI_SUCCESS != (rc = PMI_Initialized(&val))) { + log_fatal("PMI_Initialized failed: %d\n", rc); + return rc; + } + log_assert(PMI_TRUE == val, ""); + + return rc; +} + +static int test_item3(void) +{ + int rc = 0; + int val = 0; + + val = random_value(10, 100); + if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&val))) { + log_fatal("PMI_KVS_Get_key_length_max failed: %d\n", rc); + return rc; + } + log_info("PMI_KVS_Get_key_length_max=%d\n", val); + if (!_legacy) { + log_assert(511 == val, "Check PMIX_MAX_KEYLEN value in pmix_common.h"); + } + + val = random_value(10, 100); + if (PMI_SUCCESS != (rc = PMI_KVS_Get_value_length_max(&val))) { + log_fatal("PMI_KVS_Get_value_length_max failed: %d\n", rc); + return rc; + } + log_info("PMI_KVS_Get_value_length_max=%d\n", val); + if (!_legacy) { + log_assert(4096 == val, "Check limitation for a value"); + } + + return rc; +} + +static int test_item4(void) +{ + int rc = 0; + int val = 0; + int *ranks = NULL; + int i = 0; + + val = -1; + if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&val))) { + log_fatal("PMI_Get_clique_size failed: %d\n", rc); + return rc; + } + log_info("PMI_Get_clique_size=%d\n", val); + log_assert((0 < val) && (val <= size), ""); + + ranks = alloca(val); + if (!ranks) { + return PMI_FAIL; + } + + memset(ranks, (-1), val); + if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(ranks, val))) { + log_fatal("PMI_Get_clique_ranks failed: %d\n", rc); + return rc; + } + + for (i = 0; i < val; i++) { + if (!((0 <= ranks[i]) && (ranks[i] < size))) { + log_fatal("found invalid value in ranks array: ranks[%d]=%d\n", i, ranks[i]); + return rc; + } + } + + return rc; +} + +static int test_item5(void) +{ + int rc = 0; + char *val = NULL; + int val_size = 0; + /* Predefined Job attributes */ + const char *tkeys[] = { + "PMI_process_mapping", + NULL + }; + const char **ptr = tkeys; + + if (PMI_SUCCESS != (rc = PMI_KVS_Get_value_length_max(&val_size))) { + log_fatal("PMI_KVS_Get_value_length_max failed: %d\n", rc); + return rc; + } + + val = alloca(val_size); + if (!val) { + return PMI_FAIL; + } + + while (*ptr) { + if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, *ptr, val, val_size))) { + log_fatal("PMI_KVS_Get: [%s] %d\n", *ptr, rc); + return rc; + } + log_info("key=%s value=%.80s\n", *ptr, val); + ptr++; + } + + return rc; +} + +static int test_item6(void) +{ + int rc = 0; + char nspace[100]; + + log_error("pmix does not support this functionality\n"); + return rc; + if (0 == rank) { + if (PMI_SUCCESS != (rc = PMI_KVS_Create(nspace, sizeof(nspace)))) { + log_fatal("PMI_KVS_Create failed: %d\n", rc); + return rc; + } + log_info("nspace=%s\n", nspace); + + if (PMI_SUCCESS != (rc = PMI_KVS_Destroy(nspace))) { + log_fatal("PMI_KVS_Destroy failed: %d\n", rc); + return rc; + } + } + + return rc; +} + +static int test_item7(void) +{ + int rc = 0; + char val[100]; + const char *tkey = __FUNCTION__; + const char *tval = __FILE__; + + if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { + log_fatal("PMI_KVS_Put %d\n", rc); + return rc; } - + + if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { + log_fatal("PMI_KVS_Get %d\n", rc); + return rc; + } + + log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); + + log_assert(!strcmp(tval, val), "value does not meet expectation"); + + return rc; +} + +static int test_item8(void) +{ + int rc = 0; + char tkey[100]; + char tval[100]; + char val[100]; + int i = 0; + + for (i = 0; i < size; i++) { + sprintf(tkey, "KEY-%d", i); + sprintf(tval, "VALUE-%d", i); + if (i == rank) { + if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { + log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); + return rc; + } + } + + if (PMI_SUCCESS != (rc = PMI_KVS_Commit(jobid))) { + log_fatal("PMI_KVS_Commit %d\n", rc); + return rc; + } + + if (PMI_SUCCESS != (rc = PMI_Barrier())) { + log_fatal("PMI_Barrier %d\n", rc); + return rc; + } + + if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { + log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); + return rc; + } + + log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); + + log_assert(!strcmp(tval, val), "value does not meet expectation"); + } + return rc; } diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c b/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c index 7e686131305..7d6ed93b445 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c @@ -16,6 +16,8 @@ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c b/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c index bb5d5a8ef76..94b9eb4c7c9 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c @@ -2,6 +2,8 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h b/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h index 88075f01610..8ea85ed4704 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_common.c b/opal/mca/pmix/pmix1xx/pmix/test/test_common.c index 157d0b2b01a..904115e175a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_common.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_common.c @@ -2,6 +2,8 @@ * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_common.h b/opal/mca/pmix/pmix1xx/pmix/test/test_common.h index 85779bd8f64..8b2e19faa17 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_common.h @@ -4,6 +4,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_fence.c b/opal/mca/pmix/pmix1xx/pmix/test/test_fence.c index c33fb48dd1e..226214360f0 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_fence.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_fence.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_fence.h b/opal/mca/pmix/pmix1xx/pmix/test/test_fence.h index ef127ba651c..852f4dbf1c5 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_fence.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_fence.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_publish.c b/opal/mca/pmix/pmix1xx/pmix/test/test_publish.c index 17820b3d400..86f799bdead 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_publish.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_publish.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_publish.h b/opal/mca/pmix/pmix1xx/pmix/test/test_publish.h index 1cef6638662..03295b71656 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_publish.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_publish.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c b/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c index 53df7c0e776..e9582cb0df8 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h b/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h index 3060f6404d8..fc402cbab0f 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c b/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c index 8b300ff1e4f..3afbdc19753 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h b/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h index 6a260100775..2b2a12dd235 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/utils.c b/opal/mca/pmix/pmix1xx/pmix/test/utils.c index f2af4325f98..ab6ad0cd052 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/utils.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/utils.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix1xx/pmix/test/utils.h b/opal/mca/pmix/pmix1xx/pmix/test/utils.h index 7d2cf0880dd..bbeebaa2f7a 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/utils.h +++ b/opal/mca/pmix/pmix1xx/pmix/test/utils.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow