Skip to content

Commit

Permalink
Update the allocation simulator
Browse files Browse the repository at this point in the history
Always default the number of slots to the available cpus
in the topology. Ensure that we always display some form
of the resulting proces map, or else we will silently
exit.

Signed-off-by: Ralph Castain <rhc@pmix.org>
  • Loading branch information
rhc54 committed Apr 3, 2024
1 parent 5cde35d commit f01e2a2
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 15 deletions.
19 changes: 15 additions & 4 deletions src/mca/plm/base/plm_base_launch_support.c
Expand Up @@ -128,8 +128,8 @@ void prte_plm_base_daemons_reported(int fd, short args, void *cbdata)

/* if we are not launching, then we just assume that all
* daemons share our topology */
if (prte_get_attribute(&caddy->jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL)
&& PMIX_CHECK_NSPACE(caddy->jdata->nspace, PRTE_PROC_MY_NAME->nspace)) {
if (prte_get_attribute(&caddy->jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL) &&
PMIX_CHECK_NSPACE(caddy->jdata->nspace, PRTE_PROC_MY_NAME->nspace)) {
node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, 0);
t = node->topology;
for (i = 1; i < prte_node_pool->size; i++) {
Expand Down Expand Up @@ -345,7 +345,7 @@ static void stack_trace_recv(int status, pmix_proc_t *sender, pmix_data_buffer_t
int rc;
pmix_byte_object_t bo;

PMIX_DATA_BUFFER_CONSTRUCT(&blob);
PMIX_DATA_BUFFER_CONSTRUCT(&blob);
PRTE_HIDE_UNUSED_PARAMS(status, tag, cbdata);

pmix_output_verbose(5, prte_plm_base_framework.framework_output,
Expand Down Expand Up @@ -640,7 +640,8 @@ void prte_plm_base_setup_job(int fd, short args, void *cbdata)

PMIX_ACQUIRE_OBJECT(caddy);

PMIX_OUTPUT_VERBOSE((5, prte_plm_base_framework.framework_output, "%s plm:base:setup_job",
PMIX_OUTPUT_VERBOSE((5, prte_plm_base_framework.framework_output,
"%s plm:base:setup_job",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)));

if (PRTE_JOB_STATE_INIT != caddy->job_state) {
Expand Down Expand Up @@ -689,6 +690,16 @@ void prte_plm_base_setup_job(int fd, short args, void *cbdata)
prte_event_evtimer_add(timer->ev, &timer->tv);
}

// if we are not going to launch this job, then ensure we output something - otherwise,
// we will simply silently exit
if (!prte_get_attribute(&caddy->jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL) &&
!prte_get_attribute(&caddy->jdata->attributes, PRTE_JOB_DISPLAY_MAP, NULL, PMIX_BOOL) &&
!prte_get_attribute(&caddy->jdata->attributes, PRTE_JOB_DISPLAY_DEVEL_MAP, NULL, PMIX_BOOL)) {
// default to the devel map
prte_set_attribute(&caddy->jdata->attributes, PRTE_JOB_DISPLAY_DEVEL_MAP, PRTE_ATTR_GLOBAL,
NULL, PMIX_BOOL);
}

/* set the job state to the next position */
PRTE_ACTIVATE_JOB_STATE(caddy->jdata, PRTE_JOB_STATE_INIT_COMPLETE);

Expand Down
6 changes: 3 additions & 3 deletions src/mca/ras/simulator/ras_sim_component.c
Expand Up @@ -16,7 +16,7 @@
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -60,13 +60,13 @@ static int ras_sim_register(void)
{
pmix_mca_base_component_t *component = &prte_mca_ras_simulator_component.super;

prte_mca_ras_simulator_component.slots = "1";
prte_mca_ras_simulator_component.slots = NULL;
(void) pmix_mca_base_component_var_register(component, "slots",
"Comma-separated list of number of slots on each node to simulate",
PMIX_MCA_BASE_VAR_TYPE_STRING,
&prte_mca_ras_simulator_component.slots);

prte_mca_ras_simulator_component.slots_max = "0";
prte_mca_ras_simulator_component.slots_max = NULL;
(void) pmix_mca_base_component_var_register(component, "max_slots",
"Comma-separated list of number of max slots on each node to simulate",
PMIX_MCA_BASE_VAR_TYPE_STRING,
Expand Down
15 changes: 7 additions & 8 deletions src/mca/ras/simulator/ras_sim_module.c
Expand Up @@ -5,7 +5,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
*
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -132,19 +132,18 @@ static int allocate(prte_job_t *jdata, pmix_list_t *nodes)
pmix_asprintf(&node->name, "%s%0*d", prefix, dig, i);
node->state = PRTE_NODE_STATE_UP;
node->slots_inuse = 0;
if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) {
node->slots_max = 0;
} else {
obj = hwloc_get_root_obj(t->topo);
node->slots_max = prte_hwloc_base_get_npus(t->topo, use_hwthread_cpus, available,
obj);
}
if (NULL == slot_cnt || NULL == slot_cnt[n]) {
obj = hwloc_get_root_obj(t->topo);
node->slots = prte_hwloc_base_get_npus(t->topo, use_hwthread_cpus, available, obj);
} else {
node->slots = strtol(slot_cnt[n], NULL, 10);
}
if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) {
obj = hwloc_get_root_obj(t->topo);
node->slots_max = prte_hwloc_base_get_npus(t->topo, use_hwthread_cpus, available, obj);
} else {
node->slots_max = strtol(max_slot_cnt[n], NULL, 10);
}
PMIX_RETAIN(t);
node->topology = t;
pmix_output_verbose(1, prte_ras_base_framework.framework_output,
Expand Down

0 comments on commit f01e2a2

Please sign in to comment.