Skip to content

Commit

Permalink
Fix app/local rank computation
Browse files Browse the repository at this point in the history
While it was a valiant attempt at improving efficiency, we still
haven't figured out how to do this in one pass. So go back to
a full ranking pass after the job has been mapped.

Signed-off-by: Ralph Castain <rhc@pmix.org>
  • Loading branch information
rhc54 committed Aug 19, 2022
1 parent 0937e7c commit 1fbfb0b
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 98 deletions.
28 changes: 7 additions & 21 deletions src/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,18 +868,11 @@ static int map_colocate(prte_job_t *jdata,
}
}
}
for (i=0; i < jdata->apps->size; i++) {
app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, i);
if (NULL == app) {
continue;
}
/* calculate the ranks for this app */
ret = prte_rmaps_base_compute_vpids(jdata, app, options);
if (PRTE_SUCCESS != ret) {
return ret;
}
/* calculate the ranks for this job */
ret = prte_rmaps_base_compute_vpids(jdata, options);
if (PRTE_SUCCESS != ret) {
return ret;
}

ret = PRTE_SUCCESS;
goto done;
}
Expand Down Expand Up @@ -934,16 +927,9 @@ static int map_colocate(prte_job_t *jdata,
}
}
}
for (i=0; i < jdata->apps->size; i++) {
app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, i);
if (NULL == app) {
continue;
}
/* calculate the ranks for this app */
ret = prte_rmaps_base_compute_vpids(jdata, app, options);
if (PRTE_SUCCESS != ret) {
return ret;
}
ret = prte_rmaps_base_compute_vpids(jdata, options);
if (PRTE_SUCCESS != ret) {
return ret;
}
ret = PRTE_SUCCESS;

Expand Down
90 changes: 40 additions & 50 deletions src/mca/rmaps/base/rmaps_base_ranking.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,33 @@
#include "src/mca/rmaps/base/base.h"
#include "src/mca/rmaps/base/rmaps_private.h"

static void compute_app_rank(prte_job_t *jdata)
{
int i, j, k;
prte_app_context_t *app;
prte_proc_t *proc;

for (i=0; i < jdata->apps->size; i++) {
app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, i);
if (NULL == app) {
continue;
}
k=0;
/* loop thru all procs in job to find those from this app_context */
for (j=0; j < jdata->procs->size; j++) {
proc = (prte_proc_t*)pmix_pointer_array_get_item(jdata->procs, j);
if (NULL == proc) {
continue;
}
if (proc->app_idx != app->idx) {
continue;
}
proc->app_rank = k++;
}
}
}

int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
prte_app_context_t *app,
prte_rmaps_options_t *options)
{
int m, n;
Expand All @@ -54,13 +79,12 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
prte_proc_t *proc;
int rc;
hwloc_obj_t obj;
pmix_rank_t rank, lrank, apprank;
pmix_rank_t rank, lrank;

if (options->userranked) {
/* ranking has already been done, but we still need to
* compute the local and app ranks (node rank is computed
* on-the-fly during mapping) */
apprank = 0;
for (n=0; n < jdata->map->nodes->size; n++) {
node = (prte_node_t*)pmix_pointer_array_get_item(jdata->map->nodes, n);
if (NULL == node) {
Expand All @@ -75,23 +99,17 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
if (!PMIX_CHECK_NSPACE(jdata->nspace, proc->name.nspace)) {
continue;
}
if (app->idx != proc->app_idx) {
continue;
}
proc->name.rank = rank;
proc->local_rank = lrank;
proc->app_rank = apprank;
PMIX_RETAIN(proc);
rc = pmix_pointer_array_set_item(jdata->procs, proc->name.rank, proc);
if (PMIX_SUCCESS != rc) {
PMIX_RELEASE(proc);
return rc;
}
++rank;
++lrank;
++apprank;
}
}
compute_app_rank(jdata);
return PRTE_SUCCESS;
}

Expand All @@ -101,8 +119,7 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
* proc array - this is the order in which they
* were assigned */
if (PRTE_RANK_BY_SLOT == options->rank) {
rank = options->last_rank;
apprank = 0;
rank = 0;
for (n=0; n < jdata->map->nodes->size; n++) {
node = (prte_node_t*)pmix_pointer_array_get_item(jdata->map->nodes, n);
if (NULL == node) {
Expand All @@ -117,12 +134,8 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
if (!PMIX_CHECK_NSPACE(jdata->nspace, proc->name.nspace)) {
continue;
}
if (app->idx != proc->app_idx) {
continue;
}
proc->name.rank = rank;
proc->local_rank = lrank;
proc->app_rank = apprank;
PMIX_RETAIN(proc);
rc = pmix_pointer_array_set_item(jdata->procs, proc->name.rank, proc);
if (PMIX_SUCCESS != rc) {
Expand All @@ -131,25 +144,22 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
}
++rank;
++lrank;
++apprank;
}
}
/* save the starting place for the next app */
options->last_rank = rank;
compute_app_rank(jdata);
return PRTE_SUCCESS;
}

/* if we are ranking by NODE, then we use the number of nodes
* used by this app (which is stored in the "options" struct)
* and increment the rank for each proc on each node by that */
if (PRTE_RANK_BY_NODE == options->rank) {
apprank = 0;
for (n=0; n < jdata->map->nodes->size; n++) {
node = (prte_node_t*)pmix_pointer_array_get_item(jdata->map->nodes, n);
if (NULL == node) {
continue;
}
rank = n + options->last_rank;
rank = n;
lrank = 0;
for (m=0; m < node->procs->size; m++) {
proc = (prte_proc_t*)pmix_pointer_array_get_item(node->procs, m);
Expand All @@ -159,12 +169,8 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
if (!PMIX_CHECK_NSPACE(jdata->nspace, proc->name.nspace)) {
continue;
}
if (app->idx != proc->app_idx) {
continue;
}
proc->name.rank = rank;
proc->local_rank = lrank;
proc->app_rank = apprank;
PMIX_RETAIN(proc);
rc = pmix_pointer_array_set_item(jdata->procs, proc->name.rank, proc);
if (PMIX_SUCCESS != rc) {
Expand All @@ -173,20 +179,17 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
}
rank += options->nnodes;
++lrank;
++apprank;
}
}
/* save the starting place for the next app */
options->last_rank = rank;
compute_app_rank(jdata);
return PRTE_SUCCESS;
}

/* if we are ranking FILL, we rank all procs on a given
* object on each node prior to moving to the next object
* on that node */
if (PRTE_RANK_BY_FILL == options->rank) {
rank = options->last_rank;
apprank = 0;
rank = 0;
for (n=0; n < jdata->map->nodes->size; n++) {
node = (prte_node_t*)pmix_pointer_array_get_item(jdata->map->nodes, n);
if (NULL == node) {
Expand All @@ -206,16 +209,12 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
if (!PMIX_CHECK_NSPACE(jdata->nspace, proc->name.nspace)) {
continue;
}
if (app->idx != proc->app_idx) {
continue;
}
if (obj != proc->obj) {
continue;
}
/* this proc is on this object, so rank it */
proc->name.rank = rank;
proc->local_rank = lrank;
proc->app_rank = apprank;
PMIX_RETAIN(proc);
rc = pmix_pointer_array_set_item(jdata->procs, proc->name.rank, proc);
if (PMIX_SUCCESS != rc) {
Expand All @@ -224,12 +223,10 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
}
rank++;
lrank++;
apprank++;
}
}
}
/* save the starting place for the next app */
options->last_rank = rank;
compute_app_rank(jdata);
return PRTE_SUCCESS;
}

Expand All @@ -242,11 +239,10 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
* are in the node's proc array in object order. Hence, we have
* to search for them even though that eats up time */
if (PRTE_RANK_BY_SPAN == options->rank) {
apprank = 0;
rank = options->last_rank;
rank = 0;
pass = 0;
while (apprank < app->num_procs) {
for (n=0; n < jdata->map->nodes->size && apprank < app->num_procs; n++) {
while (rank < jdata->num_procs) {
for (n=0; n < jdata->map->nodes->size && rank < jdata->num_procs; n++) {
node = (prte_node_t*)pmix_pointer_array_get_item(jdata->map->nodes, n);
if (NULL == node) {
continue;
Expand All @@ -255,28 +251,24 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
options->maptype, options->cmaplvl);
lrank = pass * nobjs;
/* make a pass across all objects on this node */
for (k=0; k < nobjs && apprank < app->num_procs; k++) {
for (k=0; k < nobjs && rank < jdata->num_procs; k++) {
/* get this object */
obj = prte_hwloc_base_get_obj_by_type(node->topology->topo,
options->maptype, options->cmaplvl, k);
/* find an unranked proc on this object */
for (m=0; m < node->procs->size && apprank < app->num_procs; m++) {
for (m=0; m < node->procs->size && rank < jdata->num_procs; m++) {
proc = (prte_proc_t*)pmix_pointer_array_get_item(node->procs, m);
if (NULL == proc) {
continue;
}
if (!PMIX_CHECK_NSPACE(jdata->nspace, proc->name.nspace)) {
continue;
}
if (app->idx != proc->app_idx) {
continue;
}
if (obj != proc->obj) {
continue;
}
if (PMIX_RANK_INVALID == proc->name.rank) {
proc->name.rank = rank;
proc->app_rank = apprank;
proc->local_rank = lrank;
PMIX_RETAIN(proc);
rc = pmix_pointer_array_set_item(jdata->procs, proc->name.rank, proc);
Expand All @@ -285,7 +277,6 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
return rc;
}
++rank;
++apprank;
++lrank;
break;
}
Expand All @@ -294,8 +285,7 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
}
++pass;
}
/* save the starting place for the next app */
options->last_rank = rank;
compute_app_rank(jdata);
return PRTE_SUCCESS;
}

Expand Down
1 change: 0 additions & 1 deletion src/mca/rmaps/base/rmaps_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ PRTE_EXPORT void prte_rmaps_base_get_starting_point(pmix_list_t *node_list,


PRTE_EXPORT int prte_rmaps_base_compute_vpids(prte_job_t *jdata,
prte_app_context_t *app,
prte_rmaps_options_t *options);

PRTE_EXPORT int prte_rmaps_base_bind_proc(prte_job_t *jdata,
Expand Down
9 changes: 3 additions & 6 deletions src/mca/rmaps/ppr/rmaps_ppr.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,18 +346,15 @@ static int ppr_mapper(prte_job_t *jdata,
rc = PRTE_ERR_SILENT;
goto error;
}
/* calculate the ranks for this app */
rc = prte_rmaps_base_compute_vpids(jdata, app, options);
if (PRTE_SUCCESS != rc) {
return rc;
}

jdata->num_procs += app->num_procs;

PMIX_LIST_DESTRUCT(&node_list);
}
free(jobppr);
return PRTE_SUCCESS;
/* calculate the ranks for this app */
rc = prte_rmaps_base_compute_vpids(jdata, options);
return rc;

error:
PMIX_LIST_DESTRUCT(&node_list);
Expand Down
4 changes: 2 additions & 2 deletions src/mca/rmaps/rank_file/rmaps_rank_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,6 @@ static int prte_rmaps_rf_map(prte_job_t *jdata,
}
jdata->num_procs++;
}
/* compute local/app ranks */
rc = prte_rmaps_base_compute_vpids(jdata, app, options);
/* update the starting point */
vpid_start += app->num_procs;
/* cleanup the node list - it can differ from one app_context
Expand All @@ -356,6 +354,8 @@ static int prte_rmaps_rf_map(prte_job_t *jdata,
if (NULL != rankfile) {
free(rankfile);
}
/* compute local/app ranks */
rc = prte_rmaps_base_compute_vpids(jdata, options);
return rc;

error:
Expand Down
1 change: 0 additions & 1 deletion src/mca/rmaps/rmaps_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ typedef struct {
unsigned nnodes;
unsigned total_nobjs;
unsigned nobjs;
pmix_rank_t last_rank;

/* binding values */
prte_binding_policy_t bind;
Expand Down
4 changes: 3 additions & 1 deletion src/mca/rmaps/round_robin/rmaps_rr.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,10 @@ static int prte_rmaps_rr_map(prte_job_t *jdata,
*/
PMIX_LIST_DESTRUCT(&node_list);
}
/* calculate the ranks for this job */
rc = prte_rmaps_base_compute_vpids(jdata, options);

return PRTE_SUCCESS;
return rc;

error:
PMIX_LIST_DESTRUCT(&node_list);
Expand Down
Loading

0 comments on commit 1fbfb0b

Please sign in to comment.