Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,13 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
opal_argv_append_nosize(&members, nstring);
free(nstring);
/* have to add the number of procs in the job so the remote side
* can correctly add the procs by computing their names */
* can correctly add the procs by computing their names, and our nspace
* so they can update their records */
if (NULL == (nstring = (char*)opal_pmix.get_nspace(OMPI_PROC_MY_NAME->jobid))) {
opal_argv_free(members);
return OMPI_ERR_NOT_SUPPORTED;
}
opal_argv_append_nosize(&members, nstring);
(void)asprintf(&nstring, "%d", size);
opal_argv_append_nosize(&members, nstring);
free(nstring);
Expand Down Expand Up @@ -171,6 +177,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
}
opal_argv_append_nosize(&members, nstring);
free(nstring);
if (NULL == (nstring = (char*)opal_pmix.get_nspace(proc_list[i]->super.proc_name.jobid))) {
opal_argv_free(members);
return OMPI_ERR_NOT_SUPPORTED;
}
opal_argv_append_nosize(&members, nstring);
}
if (!dense) {
free(proc_list);
Expand Down Expand Up @@ -246,6 +257,17 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
OPAL_LIST_DESTRUCT(&mlist);
goto exit;
}
/* step over the nspace */
++i;
if (NULL == members[i]) {
/* this shouldn't happen and is an error */
OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM);
OPAL_LIST_DESTRUCT(&mlist);
opal_argv_free(members);
free(rport);
rc = OMPI_ERR_BAD_PARAM;
goto exit;
}
/* if the rank is wildcard, then we need to add all procs
* in that job to the list */
if (OPAL_VPID_WILDCARD == nm->name.vpid) {
Expand Down Expand Up @@ -295,6 +317,16 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
OPAL_LIST_DESTRUCT(&rlist);
goto exit;
}
/* next entry is the nspace - register it */
++i;
if (NULL == members[i]) {
OMPI_ERROR_LOG(OMPI_ERR_NOT_SUPPORTED);
opal_argv_free(members);
OPAL_LIST_DESTRUCT(&ilist);
OPAL_LIST_DESTRUCT(&rlist);
goto exit;
}
opal_pmix.register_jobid(nm->name.jobid, members[i]);
if (OPAL_VPID_WILDCARD == nm->name.vpid) {
jobid = nm->name.jobid;
OBJ_RELEASE(nm);
Expand Down
17 changes: 16 additions & 1 deletion opal/mca/pmix/cray/pmix_cray.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ static int cray_unpublish_nb(char **keys, opal_list_t *info,
static const char *cray_get_version(void);
static int cray_store_local(const opal_process_name_t *proc,
opal_value_t *val);
static const char *cray_get_nspace(opal_jobid_t jobid);
static void cray_register_jobid(opal_jobid_t jobid, const char *nspace);

#if 0
static bool cray_get_attr(const char *attr, opal_value_t **kv);
#endif
Expand Down Expand Up @@ -109,7 +112,9 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
.get_version = cray_get_version,
.register_errhandler = opal_pmix_base_register_handler,
.deregister_errhandler = opal_pmix_base_deregister_handler,
.store_local = cray_store_local
.store_local = cray_store_local,
.get_nspace = cray_get_nspace,
.register_jobid = cray_register_jobid
};

// usage accounting
Expand Down Expand Up @@ -814,6 +819,16 @@ static int cray_store_local(const opal_process_name_t *proc,
return OPAL_SUCCESS;
}

static const char *cray_get_nspace(opal_jobid_t jobid)
{
return NULL;
}

static void cray_register_jobid(opal_jobid_t jobid, const char *nspace)
{
return;
}

static char* pmix_error(int pmix_err)
{
char * err_msg;
Expand Down
8 changes: 8 additions & 0 deletions opal/mca/pmix/pmix.h
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,12 @@ typedef void (*opal_pmix_base_module_deregister_fn_t)(void);
typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc,
opal_value_t *val);

/* retrieve the nspace corresponding to a given jobid */
typedef const char* (*opal_pmix_base_module_get_nspace_fn_t)(opal_jobid_t jobid);

/* register a jobid-to-nspace pair */
typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, const char *nspace);

/*
* the standard public API data structure
*/
Expand Down Expand Up @@ -745,6 +751,8 @@ typedef struct {
opal_pmix_base_module_register_fn_t register_errhandler;
opal_pmix_base_module_deregister_fn_t deregister_errhandler;
opal_pmix_base_module_store_fn_t store_local;
opal_pmix_base_module_get_nspace_fn_t get_nspace;
opal_pmix_base_module_register_jobid_fn_t register_jobid;
} opal_pmix_base_module_t;

typedef struct {
Expand Down
15 changes: 14 additions & 1 deletion opal/mca/pmix/pmix1xx/pmix1.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,24 @@

BEGIN_C_DECLS

OPAL_DECLSPEC extern opal_pmix_base_component_t mca_pmix_pmix1_component;
typedef struct {
opal_pmix_base_component_t super;
opal_list_t jobids;
bool native_launch;
} mca_pmix_pmix1_component_t;

OPAL_DECLSPEC extern mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component;

OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_pmix1xx_module;

/**** INTERNAL OBJECTS ****/
typedef struct {
opal_list_item_t super;
opal_jobid_t jobid;
char nspace[PMIX_MAX_NSLEN + 1];
} opal_pmix1_jobid_trkr_t;
OBJ_CLASS_DECLARATION(opal_pmix1_jobid_trkr_t);

typedef struct {
opal_object_t super;
pmix_proc_t p;
Expand Down
Loading