Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ orte/test/system/opal_hwloc
orte/test/system/opal_db
orte/test/system/ulfm
orte/test/system/pmixtool
orte/test/system/orte_notify

orte/tools/orte-checkpoint/orte-checkpoint
orte/tools/orte-checkpoint/orte-checkpoint.1
Expand Down
27 changes: 23 additions & 4 deletions opal/mca/pmix/ext20/pmix_ext20.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ static void progress_local_event_hdlr(int status,
if (sing->code == chain->status) {
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
Expand All @@ -204,6 +207,9 @@ static void progress_local_event_hdlr(int status,
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
Expand All @@ -230,6 +236,9 @@ static void progress_local_event_hdlr(int status,
def = (opal_ext20_default_event_t*)nxt;
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
Expand Down Expand Up @@ -259,7 +268,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
opal_ext20_default_event_t *def;

opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s RECEIVED NOTIFICATION OF STATUS %d",
"%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status);

chain = OBJ_NEW(opal_ext20_event_chain_t);
Expand All @@ -281,7 +290,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s CALLING SINGLE EVHDLR",
"%s _EVENT_HDLR CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
Expand All @@ -300,7 +309,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s CALLING MULTI EVHDLR",
"%s _EVENT_HDLR CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
Expand All @@ -327,7 +336,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s CALLING DEFAULT EVHDLR",
"%s _EVENT_HDLR CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
Expand Down Expand Up @@ -812,6 +821,10 @@ void ext20_value_load(pmix_value_t *v,
* so the ORTE layer is responsible for converting it */
memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t));
break;
case OPAL_PTR:
v->type = PMIX_POINTER;
v->data.ptr = kv->data.ptr;
break;
default:
/* silence warnings */
break;
Expand Down Expand Up @@ -943,11 +956,17 @@ int ext20_value_unload(opal_value_t *kv,
case PMIX_DATA_RANGE:
kv->type = OPAL_DATA_RANGE;
kv->data.uint8 = ext20_convert_range(v->data.persist);
break;
case PMIX_PROC_STATE:
kv->type = OPAL_PROC_STATE;
/* the OPAL layer doesn't have any concept of proc state,
* so the ORTE layer is responsible for converting it */
memcpy(&kv->data.uint8, &v->data.state, sizeof(uint8_t));
break;
case PMIX_POINTER:
kv->type = OPAL_PTR;
kv->data.ptr = v->data.ptr;
break;
default:
/* silence warnings */
rc = OPAL_ERROR;
Expand Down
27 changes: 23 additions & 4 deletions opal/mca/pmix/pmix2x/pmix2x.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ static void progress_local_event_hdlr(int status,
if (sing->code == chain->status) {
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
Expand All @@ -204,6 +207,9 @@ static void progress_local_event_hdlr(int status,
* callback function to our progression function */
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
Expand All @@ -230,6 +236,9 @@ static void progress_local_event_hdlr(int status,
def = (opal_pmix2x_default_event_t*)nxt;
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s PROGRESS CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
progress_local_event_hdlr, (void*)chain);
Expand Down Expand Up @@ -259,7 +268,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
opal_pmix2x_default_event_t *def;

opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s RECEIVED NOTIFICATION OF STATUS %d",
"%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status);

chain = OBJ_NEW(opal_pmix2x_event_chain_t);
Expand All @@ -281,7 +290,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
OBJ_RETAIN(chain);
chain->sing = sing;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s CALLING SINGLE EVHDLR",
"%s _EVENT_HDLR CALLING SINGLE EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
sing->handler(chain->status, &chain->source,
chain->info, &chain->results,
Expand All @@ -300,7 +309,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
OBJ_RETAIN(chain);
chain->multi = multi;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s CALLING MULTI EVHDLR",
"%s _EVENT_HDLR CALLING MULTI EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
multi->handler(chain->status, &chain->source,
chain->info, &chain->results,
Expand All @@ -327,7 +336,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
OBJ_RETAIN(chain);
chain->def = def;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s CALLING DEFAULT EVHDLR",
"%s _EVENT_HDLR CALLING DEFAULT EVHDLR",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
def->handler(chain->status, &chain->source,
chain->info, &chain->results,
Expand Down Expand Up @@ -812,6 +821,10 @@ void pmix2x_value_load(pmix_value_t *v,
* so the ORTE layer is responsible for converting it */
memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t));
break;
case OPAL_PTR:
v->type = PMIX_POINTER;
v->data.ptr = kv->data.ptr;
break;
default:
/* silence warnings */
break;
Expand Down Expand Up @@ -943,11 +956,17 @@ int pmix2x_value_unload(opal_value_t *kv,
case PMIX_DATA_RANGE:
kv->type = OPAL_DATA_RANGE;
kv->data.uint8 = pmix2x_convert_range(v->data.persist);
break;
case PMIX_PROC_STATE:
kv->type = OPAL_PROC_STATE;
/* the OPAL layer doesn't have any concept of proc state,
* so the ORTE layer is responsible for converting it */
memcpy(&kv->data.uint8, &v->data.state, sizeof(uint8_t));
break;
case PMIX_POINTER:
kv->type = OPAL_PTR;
kv->data.ptr = v->data.ptr;
break;
default:
/* silence warnings */
rc = OPAL_ERROR;
Expand Down
17 changes: 16 additions & 1 deletion orte/mca/errmgr/default_app/errmgr_default_app.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,34 @@ static void notify_cbfunc(int status,
opal_list_t *info, opal_list_t *results,
opal_pmix_notification_complete_fn_t cbfunc, void *cbdata)
{
orte_proc_state_t state;

OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
"%s errmgr:default_app: pmix event handler called with status %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_ERROR_NAME(status)));

/* we must convert the incoming status into an equivalent state
* so we can activate the state machine */
switch(status) {
case OPAL_ERR_PROC_ABORTED:
state = ORTE_PROC_STATE_ABORTED;
break;
case OPAL_ERR_PROC_REQUESTED_ABORT:
state = ORTE_PROC_STATE_CALLED_ABORT;
break;
default:
state = ORTE_PROC_STATE_TERMINATED;
}

/* let the caller know we processed this, but allow the
* chain to continue */
if (NULL != cbfunc) {
cbfunc(ORTE_SUCCESS, NULL, NULL, NULL, cbdata);
}

/* push it into our event base */
ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, status);
ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, state);
}

/************************
Expand Down
2 changes: 1 addition & 1 deletion orte/test/system/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \
orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \
orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \
mapper reducer opal_hotel orte_dfs ulfm pmixtool
mapper reducer opal_hotel orte_dfs ulfm pmixtool orte_notify

all: $(PROGS)

Expand Down
83 changes: 83 additions & 0 deletions orte/test/system/orte_notify.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/* -*- C -*-
*
* $HEADER$
*
*/

#include "orte_config.h"

#include <stdio.h>
#include <unistd.h>

#include "opal/mca/pmix/pmix.h"
#include "orte/runtime/runtime.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"

static pid_t pid;
static char hostname[OPAL_MAXHOSTNAMELEN];

static void notification_fn(int status,
const opal_process_name_t *source,
opal_list_t *info, opal_list_t *results,
opal_pmix_notification_complete_fn_t cbfunc,
void *cbdata)
{
int peer_rank;

fprintf(stderr, "orte_notify: Name %s Host: %s Pid %ld source %s\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
hostname, (long)pid, ORTE_NAME_PRINT(source));

/** let the notifier know we are done */
if (cbfunc) {
cbfunc(OPAL_ERR_HANDLERS_COMPLETE, NULL, NULL, NULL, cbdata);
}

}

static void errhandler_reg_callbk(int status,
size_t evhdlr_ref,
void *cbdata)
{
return;
}

int main(int argc, char* argv[])
{
int rc;
opal_value_t *kv;
opal_list_t info;

if (0 > (rc = orte_init(&argc, &argv, ORTE_PROC_NON_MPI))) {
fprintf(stderr, "orte_abort: couldn't init orte - error code %d\n", rc);
return rc;
}
pid = getpid();
gethostname(hostname, sizeof(hostname));

printf("orte_notify: Name %s Host: %s Pid %ld\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
hostname, (long)pid);
fflush(stdout);

/* register the event handler */
OBJ_CONSTRUCT(&info, opal_list_t);
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_EVENT_ORDER_PREPEND);
kv->type = OPAL_BOOL;
kv->data.flag = true;
opal_list_append(&info, &kv->super);

opal_pmix.register_evhandler(NULL, &info,
notification_fn,
NULL, NULL);

while (1) {
usleep(100);
}

return 0;
}