Skip to content

Commit

Permalink
Correct error in retrieval of node and app info
Browse files Browse the repository at this point in the history
Per the Standard, PMIx_Get of node and app info
is to ignore the procID argument. Ensure that we
correctly do so. Add a new test client3 example.

Signed-off-by: Ralph Castain <rhc@pmix.org>
  • Loading branch information
rhc54 committed Mar 16, 2024
1 parent 8733b7a commit 8a5bee6
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 48 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ bindings/python/tests/python/__pycache__/
examples/alloc
examples/client
examples/client2
examples/client3
examples/debugger
examples/debuggerd
examples/dmodex
Expand Down
8 changes: 6 additions & 2 deletions examples/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_buildd
noinst_PROGRAMS = client client2 dmodex dynamic fault pub pubi \
tool debugger debuggerd alloc jctrl group group_dmodex asyncgroup \
hello nodeinfo abi_no_init abi_with_init group_lcl_cid pset log \
group_bootstrap
group_bootstrap client3

if !WANT_HIDDEN
# these examples use internal symbols
Expand All @@ -43,6 +43,10 @@ client2_SOURCES = client2.c examples.h
client2_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
client2_LDADD = $(top_builddir)/src/libpmix.la

client3_SOURCES = client3.c examples.h
client3_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
client3_LDADD = $(top_builddir)/src/libpmix.la

debugger_SOURCES = debugger.c examples.h
debugger_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
debugger_LDADD = $(top_builddir)/src/libpmix.la
Expand Down Expand Up @@ -138,4 +142,4 @@ distclean-local:
debugger debuggerd dmodex dynamic fault group \
hello jctrl launcher log pub pubi server tool \
abi_no_init abi_with_init group_lcl_cid pset \
async_group group_dmodex group_bootstrap
async_group group_dmodex group_bootstrap client3
24 changes: 22 additions & 2 deletions examples/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2019 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2022 ParTec AG. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -123,7 +123,7 @@ int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_value_t value;
pmix_value_t *val = NULL;
pmix_value_t *val = NULL, *val2 = NULL;
char *tmp;
pmix_proc_t proc;
uint32_t nprocs, n, k, nlocal;
Expand Down Expand Up @@ -299,6 +299,26 @@ int main(int argc, char **argv)
}
PMIX_INFO_FREE(info, 1);

/* get a list of our local procs - some may not be in our job */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PROCS, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs with WILDCARD rank failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
// get the list using our proc ID
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_PROCS, NULL, 0, &val2))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs with my ID failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
if (PMIX_EQUAL == PMIx_Value_compare(val, val2)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs GOOD\n", myproc.nspace, myproc.rank);
} else {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs mismatch\n", myproc.nspace, myproc.rank);
}
PMIX_VALUE_RELEASE(val);
PMIX_VALUE_RELEASE(val2);

/* get a list of our local peers */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local peers failed: %s\n", myproc.nspace,
Expand Down
206 changes: 206 additions & 0 deletions examples/client3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2019 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2022 ParTec AG. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/

#define _GNU_SOURCE
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

#include "examples.h"
#include <pmix.h>

static pmix_proc_t myproc;

/* this is the event notification function we pass down below
* when registering for general events - i.e.,, the default
* handler. We don't technically need to register one, but it
* is usually good practice to catch any events that occur */
static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status,
const pmix_proc_t *source, pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata)
{
if (NULL != cbfunc) {
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
}

EXAMPLES_HIDE_UNUSED_PARAMS(evhdlr_registration_id, status, source, info, ninfo, results, nresults);
}

/* event handler registration is done asynchronously because it
* may involve the PMIx server registering with the host RM for
* external events. So we provide a callback function that returns
* the status of the request (success or an error), plus a numerical index
* to the registered event. The index is used later on to deregister
* an event handler - if we don't explicitly deregister it, then the
* PMIx server will do so when it see us exit */
static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata)
{
mylock_t *lock = (mylock_t *) cbdata;

if (PMIX_SUCCESS != status) {
fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref);
}
lock->status = status;
lock->evhandler_ref = evhandler_ref;
DEBUG_WAKEUP_THREAD(lock);
}

int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_value_t *val = NULL, *val2 = NULL;
pmix_proc_t proc;
uint32_t nprocs;
mylock_t mylock;
pid_t pid;

EXAMPLES_HIDE_UNUSED_PARAMS(argc, argv);

pid = getpid();
fprintf(stderr, "Client %lu: Running\n", (unsigned long) pid);

/* init us - note that the call to "init" includes the return of
* any job-related info provided by the RM. This includes any
* debugger flag instructing us to stop-in-init. If such a directive
* is included, then the process will be stopped in this call until
* the "debugger release" notification arrives */
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank,
PMIx_Error_string(rc));
exit(0);
}
fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank,
(unsigned long) pid);

/* register our default event handler - again, this isn't strictly
* required, but is generally good practice */
DEBUG_CONSTRUCT_LOCK(&mylock);
PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk,
(void *) &mylock);
DEBUG_WAIT_THREAD(&mylock);
rc = mylock.status;
DEBUG_DESTRUCT_LOCK(&mylock);

if (PMIX_SUCCESS != rc) {
fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace,
myproc.rank);
goto done;
}

/* job-related info is found in our nspace, assigned to the
* wildcard rank as it doesn't relate to a specific rank. Setup
* a name to retrieve such values */
PMIX_PROC_CONSTRUCT(&proc);
PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD);

/* get the number of procs in our job - univ size is the total number of allocated
* slots, not the number of procs in the job */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
nprocs = val->data.uint32;
PMIX_VALUE_RELEASE(val);
fprintf(stderr, "Client %s:%d num procs %d\n", myproc.nspace, myproc.rank, nprocs);

/* get a list of our local procs - some may not be in our job */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PROCS, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs with WILDCARD rank failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
// get the list using our proc ID
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_PROCS, NULL, 0, &val2))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs with my ID failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
if (PMIX_EQUAL == PMIx_Value_compare(val, val2)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs GOOD\n", myproc.nspace, myproc.rank);
} else {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get local procs mismatch\n", myproc.nspace, myproc.rank);
}
PMIX_VALUE_RELEASE(val);
PMIX_VALUE_RELEASE(val2);

/* get our nodeID in various ways */
if (PMIX_SUCCESS != (rc = PMIx_Get(NULL, PMIX_NODEID, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get nodeID with NULL proc failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
}
// get the nodeID using our proc ID
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_NODEID, NULL, 0, &val2))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get nodeID with my ID failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
}
if (NULL != val && NULL != val2 && PMIX_EQUAL == PMIx_Value_compare(val, val2)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get nodeID GOOD\n", myproc.nspace, myproc.rank);
} else {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get nodeID mismatch\n", myproc.nspace, myproc.rank);
}
PMIX_VALUE_RELEASE(val);
PMIX_VALUE_RELEASE(val2);

/* get our hostname in various ways */
if (PMIX_SUCCESS != (rc = PMIx_Get(NULL, PMIX_HOSTNAME, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get hostname with NULL proc failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
// get the nodeID using our proc ID
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_HOSTNAME, NULL, 0, &val2))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get hostname with my ID failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
goto done;
}
if (PMIX_EQUAL == PMIx_Value_compare(val, val2)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get hostname GOOD\n", myproc.nspace, myproc.rank);
} else {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get hostname mismatch\n", myproc.nspace, myproc.rank);
}
PMIX_VALUE_RELEASE(val);
PMIX_VALUE_RELEASE(val2);

done:
/* finalize us */
fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace,
myproc.rank, PMIx_Error_string(rc));
} else {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n",
myproc.nspace, myproc.rank);
}
fflush(stderr);
return (0);
}

0 comments on commit 8a5bee6

Please sign in to comment.