From cf3efbbaedb2a95765271014db17ffb8525746d0 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 7 Sep 2025 20:21:24 -0600 Subject: [PATCH 1/3] Silence Coverity warnings Signed-off-by: Ralph Castain --- src/mca/ess/base/ess_base_bootstrap.c | 3 + src/mca/ess/base/ess_base_std_prted.c | 5 +- src/mca/grpcomm/direct/grpcomm_direct_group.c | 34 +- src/mca/odls/base/base.h | 3 +- src/mca/odls/base/odls_base_bind.c | 15 +- src/mca/odls/base/odls_base_frame.c | 11 - src/mca/plm/base/plm_base_launch_support.c | 13 +- src/mca/ras/base/ras_base_allocate.c | 1 - src/mca/rmaps/base/rmaps_base_frame.c | 1 + src/mca/rmaps/round_robin/rmaps_rr_mappers.c | 6 +- src/mca/schizo/ompi/schizo_ompi.c | 6 + src/prted/pmix/pmix_server_dyn.c | 6 +- src/prted/pmix/pmix_server_session.c | 10 +- src/prted/prte_app_parse.c | 4 - src/tools/prun/prun.c | 2 +- src/util/Makefile.am | 3 - src/util/bipartite_graph.c | 942 ------------------ src/util/bipartite_graph.h | 155 --- src/util/bipartite_graph_internal.h | 136 --- src/util/name_fns.c | 45 +- 20 files changed, 102 insertions(+), 1299 deletions(-) delete mode 100644 src/util/bipartite_graph.c delete mode 100644 src/util/bipartite_graph.h delete mode 100644 src/util/bipartite_graph_internal.h diff --git a/src/mca/ess/base/ess_base_bootstrap.c b/src/mca/ess/base/ess_base_bootstrap.c index e7e3dc6311..d87c4b9fd1 100644 --- a/src/mca/ess/base/ess_base_bootstrap.c +++ b/src/mca/ess/base/ess_base_bootstrap.c @@ -223,6 +223,9 @@ int prte_ess_base_bootstrap(void) if (NULL != cluster) { free(cluster); } + if (NULL != ctrlhost) { + free(ctrlhost); + } if (NULL != dvmnodes) { free(dvmnodes); } diff --git a/src/mca/ess/base/ess_base_std_prted.c b/src/mca/ess/base/ess_base_std_prted.c index 1431c56fbd..6ee2faa8a3 100644 --- a/src/mca/ess/base/ess_base_std_prted.c +++ b/src/mca/ess/base/ess_base_std_prted.c @@ -446,8 +446,9 @@ int prte_ess_base_prted_setup(void) error: pmix_show_help("help-prte-runtime.txt", "prte_init:startup:internal-failure", true, error, PRTE_ERROR_NAME(ret), ret); - /* remove our use of the session directory tree */ - PMIX_RELEASE(jdata); + if (NULL != jdata) { + PMIX_RELEASE(jdata); + } return PRTE_ERR_SILENT; } diff --git a/src/mca/grpcomm/direct/grpcomm_direct_group.c b/src/mca/grpcomm/direct/grpcomm_direct_group.c index 0dac5ff902..9cfd25d3a2 100644 --- a/src/mca/grpcomm/direct/grpcomm_direct_group.c +++ b/src/mca/grpcomm/direct/grpcomm_direct_group.c @@ -8,7 +8,7 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -619,9 +619,21 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender, PMIx_Info_list_convert(coll->grpinfo, &darray); info = (pmix_info_t*)darray.array; ninfo = darray.size; - PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE); + rc = PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(reply); + PMIX_RELEASE(sig); + return; + } if (0 < ninfo) { - PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO); + rc = PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(reply); + PMIX_RELEASE(sig); + return; + } } PMIX_DATA_ARRAY_DESTRUCT(&darray); @@ -629,9 +641,21 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender, PMIx_Info_list_convert(coll->endpts, &darray); info = (pmix_info_t*)darray.array; ninfo = darray.size; - PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE); + rc = PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(reply); + PMIX_RELEASE(sig); + return; + } if (0 < ninfo) { - PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO); + rc =PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(reply); + PMIX_RELEASE(sig); + return; + } } PMIX_DATA_ARRAY_DESTRUCT(&darray); } diff --git a/src/mca/odls/base/base.h b/src/mca/odls/base/base.h index af33ba957e..eccd4f7f66 100644 --- a/src/mca/odls/base/base.h +++ b/src/mca/odls/base/base.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +56,6 @@ typedef struct { char **ev_threads; // event progress thread names int next_base; // counter to load-level thread use bool signal_direct_children_only; - pmix_lock_t lock; char *exec_agent; } prte_odls_globals_t; diff --git a/src/mca/odls/base/odls_base_bind.c b/src/mca/odls/base/odls_base_bind.c index 4df940cc83..780b455736 100644 --- a/src/mca/odls/base/odls_base_bind.c +++ b/src/mca/odls/base/odls_base_bind.c @@ -275,11 +275,10 @@ void prte_odls_base_set(prte_odls_spawn_caddy_t *cd, int write_fd) hwloc_bitmap_free(cpuset); /* if we got an error and this wasn't a default binding policy, then report it */ if (rc < 0 && PRTE_BINDING_POLICY_IS_SET(jobdat->map->binding)) { - char *tmp = NULL; if (errno == ENOSYS) { - msg = "hwloc indicates cpu binding not supported"; + msg = strdup("hwloc indicates cpu binding not supported"); } else if (errno == EXDEV) { - msg = "hwloc indicates cpu binding cannot be enforced"; + msg = strdup("hwloc indicates cpu binding cannot be enforced"); } else { pmix_asprintf(&msg, "hwloc_set_cpubind returned \"%s\" for bitmap \"%s\"", prte_strerror(rc), child->cpuset); @@ -291,19 +290,13 @@ void prte_odls_base_set(prte_odls_spawn_caddy_t *cd, int write_fd) "binding generic error", prte_process_info.nodename, context->app, msg, __FILE__, __LINE__); + free(msg); // silence static analyzer warning } else { send_warn_show_help(write_fd, "help-prte-odls-default.txt", "not bound", prte_process_info.nodename, context->app, msg, __FILE__, __LINE__); - if (NULL != tmp) { - free(tmp); - free(msg); - } - return; - } - if (NULL != tmp) { - free(tmp); free(msg); + return; } } diff --git a/src/mca/odls/base/odls_base_frame.c b/src/mca/odls/base/odls_base_frame.c index 934a613f59..346dc17922 100644 --- a/src/mca/odls/base/odls_base_frame.c +++ b/src/mca/odls/base/odls_base_frame.c @@ -78,7 +78,6 @@ prte_odls_globals_t prte_odls_globals = { .ev_threads = NULL, .next_base = 0, .signal_direct_children_only = false, - .lock = PMIX_LOCK_STATIC_INIT, .exec_agent = NULL }; @@ -126,7 +125,6 @@ void prte_odls_base_harvest_threads(void) { int i; - PMIX_ACQUIRE_THREAD(&prte_odls_globals.lock); if (0 < prte_odls_globals.num_threads) { /* stop the progress threads */ if (NULL != prte_odls_globals.ev_threads) { @@ -144,7 +142,6 @@ void prte_odls_base_harvest_threads(void) prte_odls_globals.ev_threads = NULL; } } - PMIX_RELEASE_THREAD(&prte_odls_globals.lock); } void prte_odls_base_start_threads(prte_job_t *jdata) @@ -152,10 +149,8 @@ void prte_odls_base_start_threads(prte_job_t *jdata) int i; char *tmp; - PMIX_ACQUIRE_THREAD(&prte_odls_globals.lock); /* only do this once */ if (NULL != prte_odls_globals.ev_threads) { - PMIX_RELEASE_THREAD(&prte_odls_globals.lock); return; } @@ -205,7 +200,6 @@ void prte_odls_base_start_threads(prte_job_t *jdata) free(tmp); } } - PMIX_RELEASE_THREAD(&prte_odls_globals.lock); } static int prte_odls_base_close(void) @@ -230,8 +224,6 @@ static int prte_odls_base_close(void) prte_odls_base_harvest_threads(); - PMIX_DESTRUCT_LOCK(&prte_odls_globals.lock); - return pmix_mca_base_framework_components_close(&prte_odls_base_framework, NULL); } @@ -247,9 +239,6 @@ static int prte_odls_base_open(pmix_mca_base_open_flag_t flags) bool xterm_hold; sigset_t unblock; - PMIX_CONSTRUCT_LOCK(&prte_odls_globals.lock); - prte_odls_globals.lock.active = false; // start with nobody having the thread - /* initialize the global array of local children */ prte_local_children = PMIX_NEW(pmix_pointer_array_t); if (PRTE_SUCCESS diff --git a/src/mca/plm/base/plm_base_launch_support.c b/src/mca/plm/base/plm_base_launch_support.c index ed378ee317..084a24f3f9 100644 --- a/src/mca/plm/base/plm_base_launch_support.c +++ b/src/mca/plm/base/plm_base_launch_support.c @@ -191,9 +191,16 @@ void prte_plm_base_allocation_complete(int fd, short args, void *cbdata) * to map so we can see where the procs would have * gone - so skip to the mapping state */ if (prte_get_attribute(&caddy->jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL)) { - PRTE_ACTIVATE_JOB_STATE(caddy->jdata, PRTE_JOB_STATE_DAEMONS_REPORTED); node = (prte_node_t*)pmix_pointer_array_get_item(prte_node_pool, 0); + if (NULL == node) { + // should never happen + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + PRTE_ACTIVATE_JOB_STATE(caddy->jdata, PRTE_JOB_STATE_FAILED_TO_START); + PMIX_RELEASE(caddy); + return; + } prte_rmaps_base.require_hwtcpus = !prte_hwloc_base_core_cpus(node->topology->topo); + PRTE_ACTIVATE_JOB_STATE(caddy->jdata, PRTE_JOB_STATE_DAEMONS_REPORTED); } else { /* move the state machine along */ caddy->jdata->state = PRTE_JOB_STATE_ALLOCATION_COMPLETE; @@ -1043,6 +1050,10 @@ void prte_plm_base_post_launch(int fd, short args, void *cbdata) continue; } app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, proc->app_idx); + if (NULL == app) { + // should never happen + continue; + } fprintf(fp, "(rank, host, exe, pid) = (%u, %s, %s, %d)\n", proc->name.rank, proc->node->name, app->app, proc->pid); } diff --git a/src/mca/ras/base/ras_base_allocate.c b/src/mca/ras/base/ras_base_allocate.c index 51c5096564..8917ad9d5b 100644 --- a/src/mca/ras/base/ras_base_allocate.c +++ b/src/mca/ras/base/ras_base_allocate.c @@ -306,7 +306,6 @@ void prte_ras_base_display_cpus(prte_job_t *jdata, char *nodelist) } if (0 == strcmp(nptr->name, nodes[j])) { display_cpus(nptr->topology, jdata, nodes[j]); - moveon = true; break; } if (NULL == nptr->aliases) { diff --git a/src/mca/rmaps/base/rmaps_base_frame.c b/src/mca/rmaps/base/rmaps_base_frame.c index b7ef354bd9..b997eaca86 100644 --- a/src/mca/rmaps/base/rmaps_base_frame.c +++ b/src/mca/rmaps/base/rmaps_base_frame.c @@ -668,6 +668,7 @@ int prte_rmaps_base_set_mapping_policy(prte_job_t *jdata, char *inspec) if (NULL != val) { free(val); } + return PRTE_ERR_SILENT; } for (n=0; NULL != range[n]; n++) { (void)strtol(range[n], &parm_delimiter, 10); diff --git a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c index 304a1c2320..3d2a7d37d8 100644 --- a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -715,8 +715,7 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app, /* ran out of cpus */ pmix_show_help("help-prte-rmaps-base.txt", "allocation-overload", true, - (NULL == app) ? "N/A" : app->app, - (NULL == app) ? -1 : app->num_procs, + app->app, app->num_procs, prte_rmaps_base_print_mapping(options->map), prte_hwloc_base_print_binding(options->bind)); return PRTE_ERR_SILENT; @@ -724,8 +723,7 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app, pmix_show_help("help-prte-rmaps-base.txt", "failed-map", true, PRTE_ERROR_NAME(rc), - (NULL == app) ? "N/A" : app->app, - (NULL == app) ? -1 : app->num_procs, + app->app, app->num_procs, prte_rmaps_base_print_mapping(options->map), prte_hwloc_base_print_binding(options->bind)); return PRTE_ERR_SILENT; diff --git a/src/mca/schizo/ompi/schizo_ompi.c b/src/mca/schizo/ompi/schizo_ompi.c index 1b73526ec5..5646bc9798 100644 --- a/src/mca/schizo/ompi/schizo_ompi.c +++ b/src/mca/schizo/ompi/schizo_ompi.c @@ -598,6 +598,12 @@ static int parse_cli(char **argv, pmix_cli_result_t *results, /* check for deprecated options - warn and convert them */ rc = convert_deprecated_cli(results, silent); if (PRTE_SUCCESS != rc) { + if (NULL != caught_positions) { + free(caught_positions); + } + if (NULL != caught_single_dashes) { + free(caught_single_dashes); + } return rc; } diff --git a/src/prted/pmix/pmix_server_dyn.c b/src/prted/pmix/pmix_server_dyn.c index 504c35a6bf..99ba532249 100644 --- a/src/prted/pmix/pmix_server_dyn.c +++ b/src/prted/pmix/pmix_server_dyn.c @@ -1102,7 +1102,11 @@ pmix_status_t pmix_server_connect_fn(const pmix_proc_t procs[], size_t nprocs, for (n=0; n < ninfo; n++) { if (PMIX_CHECK_KEY(&info[n], PMIX_PROC_DATA) || PMIX_CHECK_KEY(&info[n], PMIX_JOB_INFO_ARRAY)) { - PMIx_Data_pack(NULL, &cd->msg, (pmix_info_t*)&info[n], 1, PMIX_INFO); + rc = PMIx_Data_pack(NULL, &cd->msg, (pmix_info_t*)&info[n], 1, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cd); + return rc; + } } } cd->opcbfunc = cbfunc; diff --git a/src/prted/pmix/pmix_server_session.c b/src/prted/pmix/pmix_server_session.c index 8d97cc07aa..1db38d64e6 100644 --- a/src/prted/pmix/pmix_server_session.c +++ b/src/prted/pmix/pmix_server_session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024 Nanook Consulting All rights reserved. + * Copyright (c) 2022-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,6 +40,7 @@ static int process_directive(pmix_server_req_t *req) pmix_app_t *apps; size_t napps; size_t n, i; + int j; pmix_status_t rc; bool terminate = false; bool pause = false; @@ -246,8 +247,11 @@ static int process_directive(pmix_server_req_t *req) /* add the designation to the apps in the job, if one was provided. These * will be added to the global pool when the job is setup for launch */ if (NULL != jdata) { - for (n=0; n < jdata->num_apps; n++) { - app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, n); + for (j=0; j < jdata->apps->size; j++) { + app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, j); + if (NULL == app) { + continue; + } // the add_host attribute will be removed after processing prte_set_attribute(&app->attributes, PRTE_APP_ADD_HOST, PRTE_ATTR_GLOBAL, hosts, PMIX_STRING); diff --git a/src/prted/prte_app_parse.c b/src/prted/prte_app_parse.c index c1418df094..d700b3045f 100644 --- a/src/prted/prte_app_parse.c +++ b/src/prted/prte_app_parse.c @@ -86,7 +86,6 @@ static int create_app(prte_schizo_base_module_t *schizo, char **argv, int i, n, count, rc; char *param, *value, *ptr; prte_pmix_app_t *app = NULL; - char *appname = NULL; pmix_cli_item_t *opt, *opt2; pmix_cli_result_t results; char *tval; @@ -505,9 +504,6 @@ static int create_app(prte_schizo_base_module_t *schizo, char **argv, if (NULL != app) { PMIX_RELEASE(app); } - if (NULL != appname) { - free(appname); - } PMIX_DESTRUCT(&results); return rc; } diff --git a/src/tools/prun/prun.c b/src/tools/prun/prun.c index 985e3ddc33..fc8b1334f1 100644 --- a/src/tools/prun/prun.c +++ b/src/tools/prun/prun.c @@ -242,7 +242,7 @@ int prun(int argc, char *argv[]) /* stitch together the var names and URI */ pmix_asprintf(&leftover, "%lu", (unsigned long) getpid()); /* output to the pipe */ - rc = pmix_fd_write(outpipe, strlen(leftover) + 1, leftover); + pmix_fd_write(outpipe, strlen(leftover) + 1, leftover); free(leftover); close(outpipe); } else { diff --git a/src/util/Makefile.am b/src/util/Makefile.am index c12f838409..c4f4bb0035 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -45,8 +45,6 @@ EXTRA_DIST = \ headers = \ attr.h \ - bipartite_graph.h \ - bipartite_graph_internal.h \ bit_ops.h \ prte_cmd_line.h \ crc.h \ @@ -67,7 +65,6 @@ headers = \ sources = \ attr.c \ - bipartite_graph.c \ crc.c \ daemon_init.c \ dash_host/dash_host.c \ diff --git a/src/util/bipartite_graph.c b/src/util/bipartite_graph.c deleted file mode 100644 index e4a01b2a85..0000000000 --- a/src/util/bipartite_graph.c +++ /dev/null @@ -1,942 +0,0 @@ -/* - * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "prte_config.h" - -#include -#include - -#include "constants.h" -#include "prte_stdint.h" -#include "src/class/pmix_list.h" -#include "src/class/pmix_pointer_array.h" -#include "src/runtime/prte_globals.h" -#include "src/util/error.h" -#include "src/util/pmix_output.h" - -#include "src/util/bipartite_graph.h" -#include "src/util/bipartite_graph_internal.h" - -#ifndef container_of -# define container_of(ptr, type, member) ((type *) (((char *) (ptr)) - offsetof(type, member))) -#endif - -#define GRAPH_DEBUG 0 -#if GRAPH_DEBUG -# define GRAPH_DEBUG_OUT(args) printf(args) -#else -# define GRAPH_DEBUG_OUT(args) \ - do { \ - } while (0) -#endif - -#define MAX_COST INT64_MAX - -#ifndef MAX -# define MAX(a, b) ((a) > (b) ? (a) : (b)) -#endif - -#ifndef MIN -# define MIN(a, b) ((a) < (b) ? (a) : (b)) -#endif - -#define f(i, j) flow[n * i + j] - -/* ensure that (a+b<=max) */ -static inline void check_add64_overflow(int64_t a, int64_t b) -{ -#if PRTE_ENABLE_DEBUG - assert(!((b > 0) && (a > (INT64_MAX - b))) && !((b < 0) && (a < (INT64_MIN - b)))); -#else - PRTE_HIDE_UNUSED_PARAMS(a, b); -#endif -} - -static void edge_constructor(prte_bp_graph_edge_t *e) -{ - PMIX_CONSTRUCT(&e->outbound_li, pmix_list_item_t); - PMIX_CONSTRUCT(&e->inbound_li, pmix_list_item_t); -} - -static void edge_destructor(prte_bp_graph_edge_t *e) -{ - PMIX_DESTRUCT(&e->outbound_li); - PMIX_DESTRUCT(&e->inbound_li); -} - -PMIX_CLASS_DECLARATION(prte_bp_graph_edge_t); -PMIX_CLASS_INSTANCE(prte_bp_graph_edge_t, pmix_object_t, edge_constructor, edge_destructor); - -#if GRAPH_DEBUG -static void dump_vec(const char *name, int *vec, int n) __prte_attribute_unused__; - -static void dump_vec(const char *name, int *vec, int n) -{ - int i; - fprintf(stderr, "%s={", name); - for (i = 0; i < n; ++i) { - fprintf(stderr, "[%d]=%2d, ", i, vec[i]); - } - fprintf(stderr, "}\n"); -} - -static void dump_vec64(const char *name, int64_t *vec, int n) __prte_attribute_unused__; - -static void dump_vec64(const char *name, int64_t *vec, int n) -{ - int i; - fprintf(stderr, "%s={", name); - for (i = 0; i < n; ++i) { - fprintf(stderr, "[%d]=%2" PRIi64 ", ", i, vec[i]); - } - fprintf(stderr, "}\n"); -} - -static void dump_flow(int *flow, int n) __prte_attribute_unused__; - -static void dump_flow(int *flow, int n) -{ - int u, v; - - fprintf(stderr, "flow={\n"); - for (u = 0; u < n; ++u) { - fprintf(stderr, "u=%d| ", u); - for (v = 0; v < n; ++v) { - fprintf(stderr, "%2d,", f(u, v)); - } - fprintf(stderr, "\n"); - } - fprintf(stderr, "}\n"); -} -#endif - -static int get_capacity(prte_bp_graph_t *g, int source, int target) -{ - prte_bp_graph_edge_t *e; - - CHECK_VERTEX_RANGE(g, source); - CHECK_VERTEX_RANGE(g, target); - - FOREACH_OUT_EDGE(g, source, e, 0) - { - assert(e->source == source); - if (e->target == target) { - return e->capacity; - } - } - - return 0; -} - -static int set_capacity(prte_bp_graph_t *g, int source, int target, int cap) -{ - prte_bp_graph_edge_t *e; - - CHECK_VERTEX_RANGE(g, source); - CHECK_VERTEX_RANGE(g, target); - - FOREACH_OUT_EDGE(g, source, e, PRTE_ERR_NOT_FOUND) - { - assert(e->source == source); - if (e->target == target) { - e->capacity = cap; - return PRTE_SUCCESS; - } - } - - return PRTE_ERR_NOT_FOUND; -} - -static void free_vertex(prte_bp_graph_t *g, prte_bp_graph_vertex_t *v) -{ - if (NULL != v) { - if (NULL != g->v_data_cleanup_fn && NULL != v->v_data) { - g->v_data_cleanup_fn(v->v_data); - } - free(v); - } -} - -int prte_bp_graph_create(prte_bp_graph_cleanup_fn_t v_data_cleanup_fn, - prte_bp_graph_cleanup_fn_t e_data_cleanup_fn, prte_bp_graph_t **g_out) -{ - int err; - prte_bp_graph_t *g = NULL; - - if (NULL == g_out) { - return PRTE_ERR_BAD_PARAM; - } - *g_out = NULL; - - g = calloc(1, sizeof(*g)); - if (NULL == g) { - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - err = PRTE_ERR_OUT_OF_RESOURCE; - goto out_free_g; - } - - g->source_idx = -1; - g->sink_idx = -1; - - g->v_data_cleanup_fn = v_data_cleanup_fn; - g->e_data_cleanup_fn = e_data_cleanup_fn; - - /* now that we essentially have an empty graph, add vertices to it */ - PMIX_CONSTRUCT(&g->vertices, pmix_pointer_array_t); - err = pmix_pointer_array_init(&g->vertices, 0, INT_MAX, 32); - if (PRTE_SUCCESS != err) { - goto out_free_g; - } - - *g_out = g; - return PRTE_SUCCESS; - -out_free_g: - free(g); - return err; -} - -int prte_bp_graph_free(prte_bp_graph_t *g) -{ - int i; - prte_bp_graph_edge_t *e, *next; - prte_bp_graph_vertex_t *v; - - /* remove all edges from all out_edges lists */ - for (i = 0; i < NUM_VERTICES(g); ++i) { - v = V_ID_TO_PTR(g, i); - LIST_FOREACH_SAFE_CONTAINED(e, next, &v->out_edges, prte_bp_graph_edge_t, outbound_li) - { - pmix_list_remove_item(&v->out_edges, &e->outbound_li); - PMIX_RELEASE(e); - } - } - /* now remove from all in_edges lists and free the edge */ - for (i = 0; i < NUM_VERTICES(g); ++i) { - v = V_ID_TO_PTR(g, i); - LIST_FOREACH_SAFE_CONTAINED(e, next, &v->in_edges, prte_bp_graph_edge_t, inbound_li) - { - pmix_list_remove_item(&v->in_edges, &e->inbound_li); - - if (NULL != g->e_data_cleanup_fn && NULL != e->e_data) { - g->e_data_cleanup_fn(e->e_data); - } - PMIX_RELEASE(e); - } - - free_vertex(g, V_ID_TO_PTR(g, i)); - pmix_pointer_array_set_item(&g->vertices, i, NULL); - } - g->num_vertices = 0; - - PMIX_DESTRUCT(&g->vertices); - free(g); - - return PRTE_SUCCESS; -} - -int prte_bp_graph_clone(const prte_bp_graph_t *g, bool copy_user_data, - prte_bp_graph_t **g_clone_out) -{ - int err; - int i; - int index; - prte_bp_graph_t *gx; - prte_bp_graph_edge_t *e; - - if (NULL == g_clone_out) { - return PRTE_ERR_BAD_PARAM; - } - *g_clone_out = NULL; - - if (copy_user_data) { - pmix_output(0, "[%s:%d:%s] user data copy requested but not yet supported", __FILE__, - __LINE__, __func__); - abort(); - return PRTE_ERR_FATAL; - } - - gx = NULL; - err = prte_bp_graph_create(NULL, NULL, &gx); - if (PRTE_SUCCESS != err) { - return err; - } - assert(NULL != gx); - - /* reconstruct all vertices */ - for (i = 0; i < NUM_VERTICES(g); ++i) { - err = prte_bp_graph_add_vertex(gx, NULL, &index); - if (PRTE_SUCCESS != err) { - goto out_free_gx; - } - assert(index == i); - } - - /* now reconstruct all the edges (iterate by source vertex only to avoid - * double-adding) */ - for (i = 0; i < NUM_VERTICES(g); ++i) { - prte_bp_graph_vertex_t *_v; - _v = V_ID_TO_PTR(g, i); - if (NULL == _v) { - err = PRTE_ERR_NOT_FOUND; - goto out_free_gx; - } - LIST_FOREACH_CONTAINED(e, &(_v->out_edges), prte_bp_graph_edge_t, outbound_li) - { - assert(i == e->source); - err = prte_bp_graph_add_edge(gx, e->source, e->target, e->cost, e->capacity, NULL); - if (PRTE_SUCCESS != err) { - goto out_free_gx; - } - } - } - - *g_clone_out = gx; - return PRTE_SUCCESS; - -out_free_gx: - /* we don't reach in and manipulate gx's state directly, so it should be - * safe to use the standard free function */ - prte_bp_graph_free(gx); - return err; -} - -int prte_bp_graph_indegree(const prte_bp_graph_t *g, int vertex) -{ - prte_bp_graph_vertex_t *v; - - v = V_ID_TO_PTR(g, vertex); - if (NULL == v) { - PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); - return PRTE_ERR_NOT_FOUND; - } - return pmix_list_get_size(&v->in_edges); -} - -int prte_bp_graph_outdegree(const prte_bp_graph_t *g, int vertex) -{ - prte_bp_graph_vertex_t *v; - - v = V_ID_TO_PTR(g, vertex); - return pmix_list_get_size(&v->out_edges); -} - -int prte_bp_graph_add_edge(prte_bp_graph_t *g, int from, int to, int64_t cost, int capacity, - void *e_data) -{ - prte_bp_graph_edge_t *e; - prte_bp_graph_vertex_t *v_from, *v_to; - - if (from < 0 || from >= NUM_VERTICES(g)) { - return PRTE_ERR_BAD_PARAM; - } - if (to < 0 || to >= NUM_VERTICES(g)) { - return PRTE_ERR_BAD_PARAM; - } - if (cost == MAX_COST) { - return PRTE_ERR_BAD_PARAM; - } - if (capacity < 0) { - /* negative cost is fine, but negative capacity is not currently - * handled appropriately */ - return PRTE_ERR_BAD_PARAM; - } - FOREACH_OUT_EDGE(g, from, e, PRTE_ERR_NOT_FOUND) - { - assert(e->source == from); - if (e->target == to) { - return PRTE_EXISTS; - } - } - - /* this reference is owned by the out_edges list */ - e = PMIX_NEW(prte_bp_graph_edge_t); - if (NULL == e) { - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - return PRTE_ERR_OUT_OF_RESOURCE; - } - - e->source = from; - e->target = to; - e->cost = cost; - e->capacity = capacity; - e->e_data = e_data; - - v_from = V_ID_TO_PTR(g, from); - if (NULL == v_from) { - PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); - return PRTE_ERR_NOT_FOUND; - } - pmix_list_append(&v_from->out_edges, &e->outbound_li); - - PMIX_RETAIN(e); /* ref owned by in_edges list */ - v_to = V_ID_TO_PTR(g, to); - pmix_list_append(&v_to->in_edges, &e->inbound_li); - - return PRTE_SUCCESS; -} - -int prte_bp_graph_add_vertex(prte_bp_graph_t *g, void *v_data, int *index_out) -{ - prte_bp_graph_vertex_t *v; - - v = calloc(1, sizeof(*v)); - if (NULL == v) { - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - return PRTE_ERR_OUT_OF_RESOURCE; - } - - /* add to the ptr array early to simplify cleanup in the incredibly rare - * chance that adding fails */ - v->v_index = pmix_pointer_array_add(&g->vertices, v); - if (-1 == v->v_index) { - free(v); - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - return PRTE_ERR_OUT_OF_RESOURCE; - } - assert(v->v_index == g->num_vertices); - - ++g->num_vertices; - - v->v_data = v_data; - PMIX_CONSTRUCT(&v->out_edges, pmix_list_t); - PMIX_CONSTRUCT(&v->in_edges, pmix_list_t); - - if (NULL != index_out) { - *index_out = v->v_index; - } - - return PRTE_SUCCESS; -} - -int prte_bp_graph_order(const prte_bp_graph_t *g) -{ - return NUM_VERTICES(g); -} - -/** - * shrink a flow matrix for old_n vertices to one works for new_n - * - * Takes a matrix stored in a one-dimensional array of size (old_n*old_n) and - * "truncates" it into a dense array of size (new_n*new_n) that only contain - * the flow values for the first new_n vertices. E.g., it turns this array - * (old_n=5, new_n=3): - * - * 1 2 3 4 5 - * 6 7 8 9 10 - * 11 12 13 14 15 - * 16 17 18 19 20 - * 21 22 23 24 25 - * - * into this array; - * - * 1 2 3 - * 6 7 8 - * 11 12 13 - */ -static void shrink_flow_matrix(int *flow, int old_n, int new_n) -{ - int u, v; - - assert(old_n > new_n); - - for (u = 0; u < new_n; ++u) { - for (v = 0; v < new_n; ++v) { - flow[new_n * u + v] = flow[old_n * u + v]; - } - } -} - -/** - * Compute the so-called "bottleneck" capacity value for a path "pred" through - * graph "gx". - */ -static int bottleneck_path(prte_bp_graph_t *gx, int n, int *pred) -{ - int u, v; - int min; - PRTE_HIDE_UNUSED_PARAMS(n); - - min = INT_MAX; - FOREACH_UV_ON_PATH(pred, gx->source_idx, gx->sink_idx, u, v) - { - int cap_f_uv = get_capacity(gx, u, v); - min = MIN(min, cap_f_uv); - } - - return min; -} - -/** - * This routine implements the Bellman-Ford shortest paths algorithm, slightly - * specialized for our forumlation of flow networks: - * http://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm - * - * Specifically, it attempts to find the shortest path from "source" to - * "target". It returns true if such a path was found, false otherwise. Any - * found path is returned in "pred" as a predecessor chain (i.e., pred[sink] - * is the start of the path and pred[pred[sink]] is its predecessor, etc.). - * - * The contents of "pred" are only valid if this routine returns true. - */ -bool prte_bp_graph_bellman_ford(prte_bp_graph_t *gx, int source, int target, int *pred) -{ - int64_t *dist; - int i; - int n; - int u, v; - bool found_target = false; - - if (NULL == gx) { - PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); - return false; - } - if (NULL == pred) { - PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); - return false; - } - if (source < 0 || source >= NUM_VERTICES(gx)) { - return PRTE_ERR_BAD_PARAM; - } - if (target < 0 || target >= NUM_VERTICES(gx)) { - return PRTE_ERR_BAD_PARAM; - } - - /* initialize */ - n = prte_bp_graph_order(gx); - dist = malloc(n * sizeof(*dist)); - if (NULL == dist) { - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - goto out; - } - for (i = 0; i < n; ++i) { - dist[i] = MAX_COST; - pred[i] = -1; - } - dist[source] = 0; - - /* relax repeatedly */ - for (i = 1; i < NUM_VERTICES(gx); ++i) { - bool relaxed = false; -#if GRAPH_DEBUG - dump_vec("pred", pred, NUM_VERTICES(gx)); - dump_vec64("dist", dist, NUM_VERTICES(gx)); -#endif - - for (u = 0; u < NUM_VERTICES(gx); ++u) { - prte_bp_graph_edge_t *e_ptr; - - FOREACH_OUT_EDGE(gx, u, e_ptr, false) - { - v = e_ptr->target; - - /* make sure to only construct paths from edges that actually have - * non-zero capacity */ - if (e_ptr->capacity > 0 - && dist[u] != MAX_COST) { /* avoid signed overflow for "infinity" */ - check_add64_overflow(dist[u], e_ptr->cost); - if ((dist[u] + e_ptr->cost) < dist[v]) { - dist[v] = dist[u] + e_ptr->cost; - pred[v] = u; - relaxed = true; - } - } - } - } - /* optimization: stop if an outer iteration did not succeed in - * changing any dist/pred values (already at optimum) */ - if (!relaxed) { - GRAPH_DEBUG_OUT(("relaxed==false, breaking out")); - break; - } - } - - /* check for negative-cost cycles */ - for (u = 0; u < NUM_VERTICES(gx); ++u) { - prte_bp_graph_edge_t *e_ptr; - prte_bp_graph_vertex_t *_v; - _v = V_ID_TO_PTR(gx, u); - if (NULL == _v) { - goto out; - } - LIST_FOREACH_CONTAINED(e_ptr, &(_v->out_edges), prte_bp_graph_edge_t, outbound_li) - { - v = e_ptr->target; - if (e_ptr->capacity > 0 && dist[u] != MAX_COST && /* avoid signed overflow */ - (dist[u] + e_ptr->cost) < dist[v]) { - pmix_output(0, "[%s:%d:%s] negative-weight cycle detected", __FILE__, __LINE__, - __func__); - abort(); - goto out; - } - } - } - - if (dist[target] != MAX_COST) { - found_target = true; - } - -out: -#if GRAPH_DEBUG - dump_vec("pred", pred, NUM_VERTICES(gx)); -#endif - assert(pred[source] == -1); - free(dist); - GRAPH_DEBUG_OUT(("bellman_ford: found_target=%s", found_target ? "true" : "false")); - return found_target; -} - -/** - * Transform the given connected, bipartite, acyclic digraph into a flow - * network (i.e., add a source and a sink, with the source connected to vertex - * set V1 and the sink connected to vertex set V2). This also creates - * residual edges suitable for augmenting-path algorithms. All "source" nodes - * in the original graph are considered to have an output of 1 and "sink" - * nodes can take an input of 1. The result is that "forward" edges are all - * created with capacity=1, "backward" (residual) edges are created with - * capacity=0. - * - * After this routine, all capacities are "residual capacities" ($c_f$ in the - * literature). - * - * Initial flow throughout the network is assumed to be 0 at all edges. - * - * The graph will be left in an undefined state if an error occurs (though - * freeing it should still be safe). - */ -int prte_bp_graph_bipartite_to_flow(prte_bp_graph_t *g) -{ - int err; - int order; - int u, v; - int num_left, num_right; - - /* grab size before adding extra vertices */ - order = prte_bp_graph_order(g); - - err = prte_bp_graph_add_vertex(g, NULL, &g->source_idx); - if (PRTE_SUCCESS != err) { - return err; - } - err = prte_bp_graph_add_vertex(g, NULL, &g->sink_idx); - if (PRTE_SUCCESS != err) { - return err; - } - - /* The networks we are interested in are bipartite and have edges only - * from one partition to the other partition (none vice versa). We - * visualize this conventionally with all of the source vertices on the - * left-hand side of an imaginary rendering of the graph and the target - * vertices on the right-hand side of the rendering. The direction - * "forward" is considered to be moving from left to right. - */ - num_left = 0; - num_right = 0; - for (u = 0; u < order; ++u) { - int inbound = prte_bp_graph_indegree(g, u); - int outbound = prte_bp_graph_outdegree(g, u); - - if (inbound > 0 && outbound > 0) { - pmix_output(0, "[%s:%d:%s] graph is not (unidirectionally) bipartite", __FILE__, - __LINE__, __func__); - abort(); - } else if (inbound > 0) { - /* "right" side of the graph, create edges to the sink */ - ++num_right; - err = prte_bp_graph_add_edge(g, u, g->sink_idx, 0, /* no cost */ - /*capacity=*/1, - /*e_data=*/NULL); - if (PRTE_SUCCESS != err) { - GRAPH_DEBUG_OUT(("add_edge failed")); - return err; - } - } else if (outbound > 0) { - /* "left" side of the graph, create edges to the source */ - ++num_left; - err = prte_bp_graph_add_edge(g, g->source_idx, u, 0, /* no cost */ - /*capacity=*/1, - /*e_data=*/NULL); - if (PRTE_SUCCESS != err) { - GRAPH_DEBUG_OUT(("add_edge failed")); - return err; - } - } - } - - /* it doesn't make sense to extend this graph with a source and sink - * unless */ - if (num_right == 0 || num_left == 0) { - return PRTE_ERR_BAD_PARAM; - } - - /* now run through and create "residual" edges as well (i.e., create edges - * in the reverse direction with 0 initial flow and a residual capacity of - * $c_f(u,v)=c(u,v)-f(u,v)$). Residual edges can exist where no edges - * exist in the original graph. - */ - order = prte_bp_graph_order(g); /* need residuals for newly created - source/sink edges too */ - for (u = 0; u < order; ++u) { - prte_bp_graph_edge_t *e_ptr; - FOREACH_OUT_EDGE(g, u, e_ptr, PRTE_ERR_NOT_FOUND) - { - v = e_ptr->target; - - /* (u,v) exists, add (v,u) if not already present. Cost is - * negative for these edges because "giving back" flow pays us - * back any cost already incurred. */ - err = prte_bp_graph_add_edge(g, v, u, -e_ptr->cost, - /*capacity=*/0, - /*e_data=*/NULL); - if (PRTE_SUCCESS != err && PRTE_EXISTS != err) { - return err; - } - } - } - - return PRTE_SUCCESS; -} - -/** - * Implements the "Successive Shortest Path" algorithm for computing the - * minimum cost flow problem. This is a generalized version of the - * Ford-Fulkerson algorithm. There are two major changes from F-F: - * 1. In addition to capacities and flows, this algorithm pays attention to - * costs for traversing an edge. This particular function leaves the - * caller's costs alone but sets its own capacities. - * 2. Shortest paths are computed using the cost metric. - * - * The algorithm's sketch looks like: - * 1 Transform network G by adding source and sink, create residual edges - * 2 Initial flow x is zero - * 3 while ( Gx contains a path from s to t ) do - * 4 Find any shortest path P from s to t - * 5 Augment current flow x along P - * 6 update Gx - * - * This function mutates the given graph (adding vertices and edges, changing - * capacties, etc.), so callers may wish to clone the graph before calling - * this routine. - * - * The result is an array of (u,v) vertex pairs, where (u,v) is an edge in the - * original graph which has non-zero flow. - * - * Returns OMPI error codes like PRTE_SUCCESS/PRTE_ERR_OUT_OF_RESOURCE. - * - * This version of the algorithm has a theoretical upper bound on its running - * time of O(|V|^2 * |E| * f), where f is essentially the maximum flow in the - * graph. In our case, f=min(|V1|,|V2|), where V1 and V2 are the two - * constituent sets of the bipartite graph. - * - * This algorithm's performance could probably be improved by modifying it to - * use vertex potentials and Dijkstra's Algorithm instead of Bellman-Ford. - * Normally vertex potentials are needed in order to use Dijkstra's safely, - * but our graphs are constrained enough that this may not be necessary. - * Switching to Dijkstra's implemented with a heap should yield a reduced - * upper bound of O(|V| * |E| * f * log(|V|)). Let's consider this a future - * enhancement for the time being, since it's not obvious at this point that - * the faster running time will be worth the additional implementation - * complexity. - */ -static int min_cost_flow_ssp(prte_bp_graph_t *gx, int **flow_out) -{ - int err = PRTE_SUCCESS; - int n; - int *pred = NULL; - int *flow = NULL; - int u, v; - int c; - - GRAPH_DEBUG_OUT(("begin min_cost_flow_ssp()")); - - if (NULL == flow_out) { - return PRTE_ERR_BAD_PARAM; - } - *flow_out = NULL; - - n = prte_bp_graph_order(gx); - - pred = malloc(n * sizeof(*pred)); - if (NULL == pred) { - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - err = PRTE_ERR_OUT_OF_RESOURCE; - goto out_error; - } - - /* "flow" is a 2d matrix of current flow values, all initialized to zero */ - flow = calloc(n * n, sizeof(*flow)); - if (NULL == flow) { - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - err = PRTE_ERR_OUT_OF_RESOURCE; - goto out_error; - } - - /* loop as long as paths exist from source to sink */ - while (prte_bp_graph_bellman_ford(gx, gx->source_idx, gx->sink_idx, pred)) { - int cap_f_path; - - /* find any shortest path P from s to t (already present in pred) */ - GRAPH_DEBUG_OUT(("start outer iteration of SSP algorithm")); -#if GRAPH_DEBUG - dump_vec("pred", pred, NUM_VERTICES(gx)); - dump_flow(flow, n); -#endif - - cap_f_path = bottleneck_path(gx, n, pred); - - /* augment current flow along P */ - FOREACH_UV_ON_PATH(pred, gx->source_idx, gx->sink_idx, u, v) - { - assert(u == pred[v]); - - f(u, v) = f(u, v) + cap_f_path; /* "forward" edge */ - f(v, u) = f(v, u) - cap_f_path; /* residual network edge */ - - assert(f(u, v) == -f(v, u)); /* skew symmetry invariant */ - - /* update Gx as we go along: decrease capacity by this new - * augmenting flow */ - c = get_capacity(gx, u, v) - cap_f_path; - assert(c >= 0); - err = set_capacity(gx, u, v, c); - if (PRTE_SUCCESS != err) { - pmix_output(0, "[%s:%d:%s] unable to set capacity, missing edge?", __FILE__, - __LINE__, __func__); - abort(); - } - - c = get_capacity(gx, v, u) + cap_f_path; - assert(c >= 0); - err = set_capacity(gx, v, u, c); - if (PRTE_SUCCESS != err) { - pmix_output(0, "[%s:%d:%s] unable to set capacity, missing edge?", __FILE__, - __LINE__, __func__); - abort(); - } - } - } - -out: - *flow_out = flow; - free(pred); - return err; - -out_error: - free(*flow_out); - GRAPH_DEBUG_OUT(("returning error %d", err)); - goto out; -} - -int prte_bp_graph_solve_bipartite_assignment(const prte_bp_graph_t *g, int *num_match_edges_out, - int **match_edges_out) -{ - int err; - int i; - int u, v; - int n; - int *flow = NULL; - prte_bp_graph_t *gx = NULL; - - if (NULL == match_edges_out || NULL == num_match_edges_out) { - return PRTE_ERR_BAD_PARAM; - } - *num_match_edges_out = 0; - *match_edges_out = NULL; - - /* don't perturb the caller's data structure */ - err = prte_bp_graph_clone(g, false, &gx); - if (PRTE_SUCCESS != err) { - GRAPH_DEBUG_OUT(("prte_bp_graph_clone failed")); - goto out; - } - - /* Transform gx into a residual flow network with capacities, a source, a - * sink, and residual edges. We track the actual flow separately in the - * "flow" matrix. Initial capacity for every forward edge is 1. Initial - * capacity for every backward (residual) edge is 0. - * - * For the remainder of this routine (and the ssp routine) the capacities - * refer to residual capacities ($c_f$) not capacities in the original - * graph. For convenience we adjust all residual capacities as we go - * along rather than recomputing them from the flow and capacities in the - * original graph. This allows many other graph operations to have no - * direct knowledge of the flow matrix. - */ - err = prte_bp_graph_bipartite_to_flow(gx); - if (PRTE_SUCCESS != err) { - GRAPH_DEBUG_OUT(("bipartite_to_flow failed")); - PRTE_ERROR_LOG(err); - return err; - } - - /* Use the SSP algorithm to compute the min-cost flow over this network. - * Edges with non-zero flow in the result should be part of the matching. - * - * Note that the flow array returned is sized for gx, not for g. Index - * accordingly later on. - */ - err = min_cost_flow_ssp(gx, &flow); - if (PRTE_SUCCESS != err) { - GRAPH_DEBUG_OUT(("min_cost_flow_ssp failed")); - return err; - } - assert(NULL != flow); - - /* don't care about new edges in gx, only old edges in g */ - n = prte_bp_graph_order(g); - -#if GRAPH_DEBUG - dump_flow(flow, NUM_VERTICES(gx)); -#endif - shrink_flow_matrix(flow, prte_bp_graph_order(gx), n); -#if GRAPH_DEBUG - dump_flow(flow, n); -#endif - - for (u = 0; u < n; ++u) { - for (v = 0; v < n; ++v) { - if (f(u, v) > 0) { - ++(*num_match_edges_out); - } - } - } - - if (0 == *num_match_edges_out) { - /* avoid attempting to allocate a zero-byte buffer */ - goto out; - } - - *match_edges_out = malloc(*num_match_edges_out * 2 * sizeof(int)); - if (NULL == *match_edges_out) { - *num_match_edges_out = 0; - PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); - err = PRTE_ERR_OUT_OF_RESOURCE; - goto out; - } - - i = 0; - for (u = 0; u < n; ++u) { - for (v = 0; v < n; ++v) { - /* flow exists on this edge so include this edge in the matching */ - if (f(u, v) > 0) { - (*match_edges_out)[i++] = u; - (*match_edges_out)[i++] = v; - } - } - } - -out: - free(flow); - prte_bp_graph_free(gx); - return err; -} diff --git a/src/util/bipartite_graph.h b/src/util/bipartite_graph.h deleted file mode 100644 index b57fdb0be9..0000000000 --- a/src/util/bipartite_graph.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Implements an adjacency-list-based weighted directed graph (digraph), - * focused on supporting bipartite digraphs and flow-network problems. - * - * Note that some operations might be more efficient if this structure were - * converted to use an adjacency matrix instead of an adjacency list. OTOH - * that complicates other pieces of the implementation (specifically, adding - * and removing edges). */ - -#ifndef PRTE_BP_GRAPH_H -#define PRTE_BP_GRAPH_H - -struct prte_bp_graph_vertex_t; -struct prte_bp_graph_edge_t; -struct prte_bp_graph_t; - -typedef struct prte_bp_graph_vertex_t prte_bp_graph_vertex_t; -typedef struct prte_bp_graph_edge_t prte_bp_graph_edge_t; -typedef struct prte_bp_graph_t prte_bp_graph_t; - -/** - * callback function pointer type for cleaning up user data associated with a - * vertex or edge */ -typedef void (*prte_bp_graph_cleanup_fn_t)(void *user_data); - -/** - * create a new empty graph - * - * Any new vertices will have NULL user data associated. - * - * @param[in] v_data_cleanup_fn cleanup function to use for vertex user data - * @param[in] e_data_cleanup_fn cleanup function to use for edge user data - * @param[out] g_out the created graph - * - * @returns PRTE_SUCCESS or an OMPI error code - */ -int prte_bp_graph_create(prte_bp_graph_cleanup_fn_t v_data_cleanup_fn, - prte_bp_graph_cleanup_fn_t e_data_cleanup_fn, prte_bp_graph_t **g_out); - -/** - * free the given graph - * - * Any user data associated with vertices or edges in the graph will have - * the given edge/vertex cleanup callback invoked in some arbitrary order. - * - * @returns PRTE_SUCCESS or an OMPI error code - */ -int prte_bp_graph_free(prte_bp_graph_t *g); - -/** - * clone (deep copy) the given graph - * - * Note that copy_user_data==true is not currently supported (requires the - * addition of a copy callback for user data). - * - * @param[in] g the graph to clone - * @param[in] copy_user_data if true, copy vertex/edge user data to the new - * graph - * @param[in] g_clone_out the resulting cloned graph - * @returns PRTE_SUCCESS or an OMPI error code - */ -int prte_bp_graph_clone(const prte_bp_graph_t *g, bool copy_user_data, - prte_bp_graph_t **g_clone_out); - -/** - * return the number of edges for which this vertex is a destination - * - * @param[in] g the graph to query - * @param[in] vertex the vertex id to query - * @returns the number of edges for which this vertex is a destination - */ -int prte_bp_graph_indegree(const prte_bp_graph_t *g, int vertex); - -/** - * return the number of edges for which this vertex is a source - * - * @param[in] g the graph to query - * @param[in] vertex the vertex id to query - * @returns the number of edges for which this vertex is a source - */ -int prte_bp_graph_outdegree(const prte_bp_graph_t *g, int vertex); - -/** - * add an edge to the given graph - * - * @param[in] from source vertex ID - * @param[in] to target vertex ID - * @param[in] cost cost value for this edge (lower is better) - * @param[in] capacity maximum flow transmissible on this edge - * @param[in] e_data caller data to associate with this edge, useful for - * debugging or minimizing state shared across components - * - * @returns PRTE_SUCCESS or an OMPI error code - */ -int prte_bp_graph_add_edge(prte_bp_graph_t *g, int from, int to, int64_t cost, int capacity, - void *e_data); - -/** - * add a vertex to the given graph - * - * @param[in] g graph to manipulate - * @param[in] v_data data to associate with the new vertex - * @param[out] index_out integer index of the new vertex. May be NULL. - * - * @returns PRTE_SUCCESS or an OMPI error code - */ -int prte_bp_graph_add_vertex(prte_bp_graph_t *g, void *v_data, int *index_out); - -/** - * compute the order of a graph (number of vertices) - * - * @param[in] g the graph to query - */ -int prte_bp_graph_order(const prte_bp_graph_t *g); - -/** - * This function solves the "assignment problem": - * http://en.wikipedia.org/wiki/Assignment_problem - * - * The goal is to find a maximum cardinality, minimum cost matching in a - * weighted bipartite graph. Maximum cardinality takes priority over minimum - * cost. - * - * Capacities in the given graph are ignored (assumed to be 1 at the start). - * It is also assumed that the graph only contains edges from one vertex set - * to the other and that no edges exist in the reverse direction ("forward" - * edges only). - * - * The algorithm(s) used will be deterministic. That is, given the exact same - * graph, two calls to this routine will result in the same matching result. - * - * @param[in] g an acyclic bipartite directed graph for - * which a matching is sought - * @param[out] num_match_edges_out number edges found in the matching - * @param[out] match_edges_out an array of (u,v) vertex pairs indicating - * which edges are in the matching - * - * @returns PRTE_SUCCESS or an OMPI error code - */ -int prte_bp_graph_solve_bipartite_assignment(const prte_bp_graph_t *g, int *num_match_edges_out, - int **match_edges_out); - -#endif /* PRTE_BP_GRAPH_H */ diff --git a/src/util/bipartite_graph_internal.h b/src/util/bipartite_graph_internal.h deleted file mode 100644 index a28da93169..0000000000 --- a/src/util/bipartite_graph_internal.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights - * reserved. - * Copyright (c) 2019 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * This file defines a number of internal structures to the BP graph - * code which need to be exposed only for unit testing. This file - * should not be included in code that uses the BP graph interface. - */ - -#ifndef BIPARTITE_GRAPH_INTERNAL -#define BIPARTITE_GRAPH_INTERNAL 1 - -struct prte_bp_graph_edge_t { - pmix_object_t super; - - pmix_list_item_t outbound_li; - pmix_list_item_t inbound_li; - - /** source of this edge */ - int source; - - /** v_index of target of this edge */ - int target; - - /** cost (weight) of this edge */ - int64_t cost; - - /** - * (flow-network) capacity of this edge. Zero-capacity edges essentially do - * not exist and will be ignored by most of the algorithms implemented here. - */ - int capacity; - - /** any other information associated with this edge */ - void *e_data; -}; - -struct prte_bp_graph_vertex_t { - /** index in the graph's array of vertices */ - int v_index; - - /** any other information associated with the vertex */ - void *v_data; - - /** linked list of edges for which this vertex is a source */ - pmix_list_t out_edges; - - /** linked list of edges for which this vertex is a target */ - pmix_list_t in_edges; -}; - -struct prte_bp_graph_t { - /** number of vertices currently in this graph */ - int num_vertices; - - /** vertices in this graph (with number of set elements == num_vertices) */ - pmix_pointer_array_t vertices; - - /** index of the source vertex, or -1 if not present */ - int source_idx; - - /** index of the sink vertex, or -1 if not present */ - int sink_idx; - - /** user callback to clean up the v_data */ - prte_bp_graph_cleanup_fn_t v_data_cleanup_fn; - - /** user callback to clean up the e_data */ - prte_bp_graph_cleanup_fn_t e_data_cleanup_fn; -}; - -#define LIST_FOREACH_CONTAINED(item, list, type, member) \ - for (item = container_of((list)->pmix_list_sentinel.pmix_list_next, type, member); \ - &item->member != &(list)->pmix_list_sentinel; \ - item = container_of(((pmix_list_item_t *) (&item->member))->pmix_list_next, type, \ - member)) - -#define LIST_FOREACH_SAFE_CONTAINED(item, next, list, type, member) \ - for (item = container_of((list)->pmix_list_sentinel.pmix_list_next, type, member), \ - next = container_of(((pmix_list_item_t *) (&item->member))->pmix_list_next, type, member); \ - &item->member != &(list)->pmix_list_sentinel; item = next, \ - next = container_of(((pmix_list_item_t *) (&item->member))->pmix_list_next, type, member)) - -#define NUM_VERTICES(g) (g->num_vertices) - -#define CHECK_VERTEX_RANGE(g, v) \ - do { \ - if ((v) < 0 || (v) >= NUM_VERTICES(g)) { \ - return PRTE_ERR_BAD_PARAM; \ - } \ - } while (0) - -/* cast away any constness of &g->vertices b/c the pmix_pointer_array API is - * not const-correct */ -#define V_ID_TO_PTR(g, v_id) \ - ((prte_bp_graph_vertex_t *) pmix_pointer_array_get_item((pmix_pointer_array_t *) &g->vertices, \ - v_id)) - -#define FOREACH_OUT_EDGE(g, v_id, e_ptr, _err) \ - prte_bp_graph_vertex_t *_v; \ - _v = V_ID_TO_PTR((g), (v_id)); \ - if (NULL == _v) { \ - return (_err); \ - } \ - LIST_FOREACH_CONTAINED(e_ptr, &(_v->out_edges), prte_bp_graph_edge_t, outbound_li) - -#define FOREACH_IN_EDGE(g, v_id, e_ptr, _err) \ - prte_bp_graph_vertex_t *_v; \ - _v = V_ID_TO_PTR((g), (v_id)); \ - if (NULL == _v) { \ - return (_err); \ - } \ - LIST_FOREACH_CONTAINED(e_ptr, &(_v->in_edges), prte_bp_graph_edge_t, inbound_li) - -/* Iterate over (u,v) edge pairs along the given path, where path is defined - * by the predecessor array "pred". Stops when a -1 predecessor is - * encountered. Note: because it is a *predecessor* array, the traversal - * starts at the sink and progresses towards the source. */ -#define FOREACH_UV_ON_PATH(pred, source, sink, u, v) \ - for (u = pred[sink], v = sink; u != -1; v = u, u = pred[u]) - -bool prte_bp_graph_bellman_ford(prte_bp_graph_t *gx, int source, int target, int *pred); - -int prte_bp_graph_bipartite_to_flow(prte_bp_graph_t *g); - -#endif diff --git a/src/util/name_fns.c b/src/util/name_fns.c index 8d0336c5e6..8474f7b33d 100644 --- a/src/util/name_fns.c +++ b/src/util/name_fns.c @@ -14,7 +14,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2020 Intel, Inc. All rights reserved. * Copyright (c) 2018-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -120,7 +120,8 @@ char *prte_util_print_name_args(const pmix_proc_t *name) if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[NO-NAME]"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[NO-NAME]"); + ptr->cntr++; return ptr->buffers[ptr->cntr - 1]; } @@ -145,8 +146,8 @@ char *prte_util_print_name_args(const pmix_proc_t *name) ptr->cntr = 0; } - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "[%s,%s]", job, vpid); - + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "[%s,%s]", job, vpid); + ptr->cntr++; return ptr->buffers[ptr->cntr - 1]; } @@ -167,10 +168,11 @@ char *prte_util_print_jobids(const pmix_nspace_t job) } if (0 == strlen(job)) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); } else { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); } + ptr->cntr++; return ptr->buffers[ptr->cntr - 1]; } @@ -199,12 +201,13 @@ char *prte_util_print_job_family(const pmix_nspace_t job) cptr = strrchr(job, '@'); if (NULL == cptr) { /* this isn't a PRRTE job */ - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); } else { *cptr = '\0'; - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); *cptr = '@'; } + ptr->cntr++; } return ptr->buffers[ptr->cntr - 1]; } @@ -228,17 +231,19 @@ char *prte_util_print_local_jobid(const pmix_nspace_t job) /* see if the job is invalid */ if (PMIX_NSPACE_INVALID(job)) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); + ptr->cntr++; } else { /* find the '@' sign delimiting the job family */ cptr = strrchr(job, '@'); if (NULL == cptr) { /* this isn't a PRRTE job */ - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); } else { ++cptr; - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", cptr); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", cptr); } + ptr->cntr++; } return ptr->buffers[ptr->cntr - 1]; } @@ -260,18 +265,24 @@ char *prte_util_print_vpids(const pmix_rank_t vpid) } if (PMIX_RANK_INVALID == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "INVALID"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "INVALID"); + } else if (PMIX_RANK_WILDCARD == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "WILDCARD"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "WILDCARD"); + } else if (PMIX_RANK_LOCAL_NODE == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "LOCALNODE"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "LOCALNODE"); + } else if (PMIX_RANK_LOCAL_PEERS == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "LOCALPEERS"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "LOCALPEERS"); + } else if (PMIX_RANK_UNDEF == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "UNDEFINED"); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "UNDEFINED"); + } else { - snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%u", vpid); + snprintf(ptr->buffers[ptr->cntr], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%u", vpid); } + ptr->cntr++; return ptr->buffers[ptr->cntr - 1]; } From 661d6be65553a2218575f191f7bb6650e11956f8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 8 Sep 2025 11:30:28 -0600 Subject: [PATCH 2/3] Silence more Coverity warnings 488202 488201 488200 488197 488196 488194 488193 488191 488189 488188 488187 488185 Signed-off-by: Ralph Castain --- src/mca/grpcomm/direct/grpcomm_direct_group.c | 42 +++++++++++++++---- src/mca/odls/base/odls_base_default_fns.c | 5 +++ src/mca/odls/base/odls_base_frame.c | 9 ++-- src/mca/ras/pbs/ras_pbs_module.c | 2 + src/mca/rmaps/round_robin/rmaps_rr_mappers.c | 3 +- src/mca/schizo/base/schizo_base_frame.c | 12 +++--- src/prted/pmix/pmix_server.c | 6 +++ src/rml/oob/oob_base_stubs.c | 8 +++- src/util/attr.c | 6 +-- 9 files changed, 67 insertions(+), 26 deletions(-) diff --git a/src/mca/grpcomm/direct/grpcomm_direct_group.c b/src/mca/grpcomm/direct/grpcomm_direct_group.c index 9cfd25d3a2..7b21036b04 100644 --- a/src/mca/grpcomm/direct/grpcomm_direct_group.c +++ b/src/mca/grpcomm/direct/grpcomm_direct_group.c @@ -226,9 +226,21 @@ static void group(int sd, short args, void *cbdata) PMIx_Info_list_convert(grpinfo, &darray); info = (pmix_info_t*)darray.array; ninfo = darray.size; - PMIx_Data_pack(NULL, relay, &ninfo, 1, PMIX_SIZE); + rc = PMIx_Data_pack(NULL, relay, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(relay); + PMIX_DESTRUCT(&sig); + goto error; + } if (0 < ninfo) { - PMIx_Data_pack(NULL, relay, info, ninfo, PMIX_INFO); + rc = PMIx_Data_pack(NULL, relay, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(relay); + PMIX_DESTRUCT(&sig); + goto error; + } } PMIX_DATA_ARRAY_DESTRUCT(&darray); @@ -236,9 +248,21 @@ static void group(int sd, short args, void *cbdata) PMIx_Info_list_convert(endpts, &darray); info = (pmix_info_t*)darray.array; ninfo = darray.size; - PMIx_Data_pack(NULL, relay, &ninfo, 1, PMIX_SIZE); + rc = PMIx_Data_pack(NULL, relay, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(relay); + PMIX_DESTRUCT(&sig); + goto error; + } if (0 < ninfo) { - PMIx_Data_pack(NULL, relay, info, ninfo, PMIX_INFO); + rc = PMIx_Data_pack(NULL, relay, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DATA_BUFFER_RELEASE(relay); + PMIX_DESTRUCT(&sig); + goto error; + } } PMIX_DATA_ARRAY_DESTRUCT(&darray); } @@ -298,7 +322,7 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender, prte_namelist_t *nm; pmix_data_array_t darray; pmix_status_t st; - pmix_info_t *info = NULL, *endpts, *grpinfo; + pmix_info_t *info = NULL, *endpts, *grpinfo = NULL; prte_grpcomm_direct_group_signature_t *sig = NULL; pmix_data_buffer_t *reply; prte_grpcomm_group_t *coll; @@ -386,7 +410,9 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender, rc = PMIx_Data_unpack(NULL, buffer, &nendpts, &cnt, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - PMIX_INFO_FREE(grpinfo, ngrpinfo); + if (NULL != grpinfo) { + PMIX_INFO_FREE(grpinfo, ngrpinfo); + } PMIX_RELEASE(sig); return; } @@ -396,7 +422,9 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender, rc = PMIx_Data_unpack(NULL, buffer, endpts, &cnt, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - PMIX_INFO_FREE(grpinfo, ngrpinfo); + if (NULL != grpinfo) { + PMIX_INFO_FREE(grpinfo, ngrpinfo); + } PMIX_INFO_FREE(endpts, nendpts); PMIX_RELEASE(sig); return; diff --git a/src/mca/odls/base/odls_base_default_fns.c b/src/mca/odls/base/odls_base_default_fns.c index 42ada0ca30..7e8f409909 100644 --- a/src/mca/odls/base/odls_base_default_fns.c +++ b/src/mca/odls/base/odls_base_default_fns.c @@ -2151,6 +2151,11 @@ int prte_odls_base_default_restart_proc(prte_proc_t *child, child->rml_uri = NULL; } app = (prte_app_context_t *) pmix_pointer_array_get_item(jobdat->apps, child->app_idx); + if (NULL == app) { + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + rc = PRTE_ERR_NOT_FOUND; + goto CLEANUP; + } /* setup the path */ if (PRTE_SUCCESS != (rc = setup_path(app, &wdir))) { diff --git a/src/mca/odls/base/odls_base_frame.c b/src/mca/odls/base/odls_base_frame.c index 346dc17922..bf6b92e1ad 100644 --- a/src/mca/odls/base/odls_base_frame.c +++ b/src/mca/odls/base/odls_base_frame.c @@ -252,12 +252,9 @@ static int prte_odls_base_open(pmix_mca_base_open_flag_t flags) prte_odls_globals.xtermcmd = NULL; /* ensure that SIGCHLD is unblocked as we need to capture it */ - if (0 != sigemptyset(&unblock)) { - return PRTE_ERROR; - } - if (0 != sigaddset(&unblock, SIGCHLD)) { - return PRTE_ERROR; - } + sigemptyset(&unblock); + sigaddset(&unblock, SIGCHLD); + if (0 != sigprocmask(SIG_UNBLOCK, &unblock, NULL)) { return PRTE_ERR_NOT_SUPPORTED; } diff --git a/src/mca/ras/pbs/ras_pbs_module.c b/src/mca/ras/pbs/ras_pbs_module.c index d2f50e05d4..b384f4d9b5 100644 --- a/src/mca/ras/pbs/ras_pbs_module.c +++ b/src/mca/ras/pbs/ras_pbs_module.c @@ -195,6 +195,8 @@ static int discover(pmix_list_t *nodelist, char *pbs_jobid) if (prte_mca_ras_pbs_component.smp_mode) { /* this cannot happen in smp mode */ pmix_show_help("help-ras-pbs.txt", "smp-multi", true); + fclose(fp); + free(hostname); return PRTE_ERR_BAD_PARAM; } ++node->slots; diff --git a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c index 3d2a7d37d8..6980d662e0 100644 --- a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -665,7 +665,8 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app, if (!prte_rmaps_base_check_avail(jdata, app, node, node_list, obj, options)) { rc = PRTE_ERR_OUT_OF_RESOURCE; PRTE_ERROR_LOG(rc); - continue; + // out of resources on this node + break; } proc = prte_rmaps_base_setup_proc(jdata, app->idx, node, obj, options); diff --git a/src/mca/schizo/base/schizo_base_frame.c b/src/mca/schizo/base/schizo_base_frame.c index 62affda519..d4321093bc 100644 --- a/src/mca/schizo/base/schizo_base_frame.c +++ b/src/mca/schizo/base/schizo_base_frame.c @@ -726,12 +726,12 @@ int prte_schizo_base_parse_output(pmix_cli_item_t *opt, void *jinfo) } else if (PMIX_CHECK_CLI_OPTION(options[m], PRTE_CLI_RAW)) { PMIX_INFO_LIST_ADD(ret, jinfo, PMIX_IOF_OUTPUT_RAW, NULL, PMIX_BOOL); - } - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - PMIX_ARGV_FREE_COMPAT(targv); - PMIX_ARGV_FREE_COMPAT(options); - return ret; + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + PMIX_ARGV_FREE_COMPAT(targv); + PMIX_ARGV_FREE_COMPAT(options); + return ret; + } } } PMIX_ARGV_FREE_COMPAT(options); diff --git a/src/prted/pmix/pmix_server.c b/src/prted/pmix/pmix_server.c index 44c0f783c4..189bd28476 100644 --- a/src/prted/pmix/pmix_server.c +++ b/src/prted/pmix/pmix_server.c @@ -1483,11 +1483,17 @@ static void pmix_server_dmdx_recv(int status, pmix_proc_t *sender, if (NULL == proc) { /* this is truly an error, so notify the sender */ send_error(PRTE_ERR_NOT_FOUND, &pproc, sender, index); + if (NULL != key) { + free(key); + } return; } if (!PRTE_FLAG_TEST(proc, PRTE_PROC_FLAG_LOCAL)) { /* send back an error - they obviously have made a mistake */ send_error(PRTE_ERR_NOT_FOUND, &pproc, sender, index); + if (NULL != key) { + free(key); + } return; } diff --git a/src/rml/oob/oob_base_stubs.c b/src/rml/oob/oob_base_stubs.c index 31b2edf409..527a14edd3 100644 --- a/src/rml/oob/oob_base_stubs.c +++ b/src/rml/oob/oob_base_stubs.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. + * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -218,6 +218,12 @@ void prte_oob_base_get_addr(char **uri) } #endif // PRTE_ENABLE_IPV6 + if (NULL == cptr) { + PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); + *uri = NULL; + return; + } + /* check overall length for limits */ if (0 < prte_oob_base.max_uri_length && prte_oob_base.max_uri_length < (int) (len + strlen(cptr))) { diff --git a/src/util/attr.c b/src/util/attr.c index f84fbaa284..8cd6a09218 100644 --- a/src/util/attr.c +++ b/src/util/attr.c @@ -632,11 +632,7 @@ int prte_attr_load(prte_attribute_t *kv, void *data, pmix_data_type_t type) if (NULL != kv->data.data.string) { free(kv->data.data.string); } - if (NULL != data) { - kv->data.data.string = strdup((const char *) data); - } else { - kv->data.data.string = NULL; - } + kv->data.data.string = strdup((const char *) data); break; case PMIX_SIZE: kv->data.data.size = *(size_t *) (data); From eb577d4883340310927c2ee97405feaddb9f43ba Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 9 Sep 2025 06:24:16 -0600 Subject: [PATCH 3/3] Inherit env directives if requested If someone specifies that child jobs inherit from their parents, then have them inherit any env directives as well as job-level directives. Have children inherit their parent's inheritance directive, unless directed not to do so. Signed-off-by: Ralph Castain --- src/mca/odls/base/odls_base_default_fns.c | 23 +++++++++++++++++++++-- src/mca/rmaps/base/rmaps_base_map_job.c | 7 +++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/mca/odls/base/odls_base_default_fns.c b/src/mca/odls/base/odls_base_default_fns.c index 7e8f409909..2d4203df6c 100644 --- a/src/mca/odls/base/odls_base_default_fns.c +++ b/src/mca/odls/base/odls_base_default_fns.c @@ -1252,14 +1252,15 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata) int j, idx; int total_num_local_procs = 0; prte_odls_launch_local_t *caddy = (prte_odls_launch_local_t *) cbdata; - prte_job_t *jobdat; + prte_job_t *jobdat, *parent; pmix_nspace_t job; prte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local; - bool index_argv; + bool index_argv, inherit; char *msg, **xfer; prte_odls_spawn_caddy_t *cd; prte_event_base_t *evb; prte_schizo_base_module_t *schizo; + pmix_proc_t *nptr; PRTE_HIDE_UNUSED_PARAMS(fd, sd); PMIX_ACQUIRE_OBJECT(caddy); @@ -1352,6 +1353,20 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } } + // see if we have a parent in case of inheritance + nptr = NULL; + prte_get_attribute(&jobdat->attributes, PRTE_JOB_LAUNCH_PROXY, (void **) &nptr, PMIX_PROC); + if (NULL != nptr) { + parent = prte_get_job_data_object(nptr->nspace); + if (NULL != parent) { + inherit = prte_get_attribute(&parent->attributes, PRTE_JOB_INHERIT, NULL, PMIX_BOOL); + } else { + inherit = false; + } + } else { + inherit = false; + } + for (j = 0; j < jobdat->apps->size; j++) { app = (prte_app_context_t *) pmix_pointer_array_get_item(jobdat->apps, j); if (NULL == app) { @@ -1395,6 +1410,10 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } // process any provided env directives + if (inherit) { + // start with the parent's directives + process_envars(parent, app); + } process_envars(jobdat, app); diff --git a/src/mca/rmaps/base/rmaps_base_map_job.c b/src/mca/rmaps/base/rmaps_base_map_job.c index eb6851633a..72944ebe75 100644 --- a/src/mca/rmaps/base/rmaps_base_map_job.c +++ b/src/mca/rmaps/base/rmaps_base_map_job.c @@ -241,11 +241,18 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata) // mapped by us inherit = false; parent = NULL; + } else if (prte_get_attribute(&parent->attributes, PRTE_JOB_INHERIT, NULL, PMIX_BOOL)) { inherit = true; + // if they didn't specifically direct it not inherit, then pass this on to the child + if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_NOINHERIT, NULL, PMIX_BOOL)) { + prte_set_attribute(&jdata->attributes, PRTE_ATTR_GLOBAL, PRTE_JOB_INHERIT, NULL, PMIX_BOOL); + } + } else if (prte_get_attribute(&parent->attributes, PRTE_JOB_NOINHERIT, NULL, PMIX_BOOL)) { inherit = false; parent = NULL; + } else { inherit = prte_rmaps_base.inherit; }