diff --git a/NEWS b/NEWS index a3dcc274ce8..da3792e6092 100644 --- a/NEWS +++ b/NEWS @@ -19,7 +19,7 @@ Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. Copyright (c) 2012 Sandia National Laboratories. All rights reserved. Copyright (c) 2012 University of Houston. All rights reserved. Copyright (c) 2013 NVIDIA Corporation. All rights reserved. -Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +Copyright (c) 2013-2018 Intel, Inc. All rights reserved. Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. $COPYRIGHT$ @@ -71,6 +71,7 @@ Master (not on release branches yet) - Remove IB XRC support from the OpenIB BTL due to lack of support. - Remove support for big endian PowerPC. - Remove support for XL compilers older than v13.1 +- Fix rank-by algorithms to properly rank by object and span 3.0.0 -- September, 2017 ------------------------ diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 82871636809..a33e0f332f8 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -12,7 +12,7 @@ * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -568,7 +568,7 @@ static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync) OPAL_THREAD_SCOPED_LOCK(&sync->lock, OPAL_LIST_FOREACH_SAFE(aggregation, next, &sync->aggregations, ompi_osc_rdma_aggregation_t) { - fprintf (stderr, "Flushing aggregation %p, peeer %p\n", aggregation, aggregation->peer); + fprintf (stderr, "Flushing aggregation %p, peer %p\n", (void*)aggregation, (void*)aggregation->peer); ompi_osc_rdma_peer_aggregate_flush (aggregation->peer); }); } diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 4ccc68db6bd..aa48af5c229 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,6 +50,7 @@ struct ompi_osc_rdma_event_t { typedef struct ompi_osc_rdma_event_t ompi_osc_rdma_event_t; +#if 0 static void *ompi_osc_rdma_event_put (int fd, int flags, void *context) { ompi_osc_rdma_event_t *event = (ompi_osc_rdma_event_t *) context; @@ -112,7 +113,7 @@ static int ompi_osc_rdma_event_queue (ompi_osc_rdma_module_t *module, struct mca return OMPI_SUCCESS; } - +#endif static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype, void *result_buffer, int result_count, ompi_datatype_t *result_datatype, @@ -188,10 +189,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; - const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment); unsigned long len = target_count * target_datatype->super.size; - ompi_osc_rdma_frag_t *frag = NULL; - volatile bool complete = false; char *ptr = NULL; int ret; @@ -523,7 +521,7 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const ompi_osc_rdma_module_t *module = sync->module; int32_t atomic_flags = module->selected_btl->btl_atomic_flags; int ret, btl_op, flags; - int64_t origin, result; + int64_t origin; if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) || (!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) || @@ -590,13 +588,13 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi new_value = old_value; if (&ompi_mpi_op_replace.op == op) { - memcpy ((void *)((intptr_t) &new_value) + offset, origin_addr, extent); + memcpy ((void *)((intptr_t) &new_value + offset), origin_addr, extent); } else if (&ompi_mpi_op_no_op.op != op) { - ompi_op_reduce (op, (void *) origin_addr, (void *)((intptr_t) &new_value) + offset, 1, dt); + ompi_op_reduce (op, (void *) origin_addr, (void*)((intptr_t) &new_value + offset), 1, dt); } ret = ompi_osc_rdma_btl_cswap (module, peer->data_endpoint, address, target_handle, - old_value, new_value, 0, &new_value); + old_value, new_value, 0, (int64_t*)&new_value); if (OPAL_SUCCESS != ret || new_value == old_value) { break; } @@ -605,7 +603,7 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi } while (1); if (result_addr) { - memcpy (result_addr, (void *)((intptr_t) &new_value) + offset, extent); + memcpy (result_addr, (void *)((intptr_t) &new_value + offset), extent); } if (OPAL_SUCCESS == ret) { @@ -696,11 +694,9 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, mca_btl_base_registration_handle_t *target_handle, bool lock_acquired) { ompi_osc_rdma_module_t *module = sync->module; - const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment); - unsigned long offset, aligned_len, len = datatype->super.size; + unsigned long len = datatype->super.size; mca_btl_base_registration_handle_t *local_handle = NULL; ompi_osc_rdma_frag_t *frag = NULL; - ompi_osc_rdma_request_t *request; volatile bool complete = false; /* drop the const. this code will not attempt to change the value */ char *ptr = (char *) source_addr; diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index b4fb3dec648..dd52e4938e8 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -16,7 +16,7 @@ * Copyright (c) 2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,7 +80,7 @@ void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_b { ompi_osc_rdma_pending_op_t *pending_op = (ompi_osc_rdma_pending_op_t *) context; - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "pending atomic %p complete with status %d", pending_op, status); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "pending atomic %p complete with status %d", (void*)pending_op, status); if (pending_op->op_result) { memmove (pending_op->op_result, pending_op->op_buffer, pending_op->op_size); @@ -296,7 +296,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_peer_t **peers; - int my_rank = ompi_comm_rank (module->comm); ompi_osc_rdma_state_t *state = module->state; int ret = OMPI_SUCCESS; diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 0d506374c91..fda90e91221 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -492,6 +492,7 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee return ret; } +#if 0 static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggregation, ompi_osc_rdma_request_t *request, void *source_buffer, size_t size) { @@ -550,13 +551,16 @@ static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_r return OMPI_SUCCESS; } +#endif int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; +#if 0 ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; +#endif mca_btl_base_registration_handle_t *local_handle = NULL; mca_btl_base_rdma_completion_fn_t cbfunc = NULL; ompi_osc_rdma_frag_t *frag = NULL; diff --git a/ompi/mca/osc/rdma/osc_rdma_passive_target.c b/ompi/mca/osc/rdma/osc_rdma_passive_target.c index 37b1bee2577..dc11c5e31df 100644 --- a/ompi/mca/osc/rdma/osc_rdma_passive_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_passive_target.c @@ -12,6 +12,7 @@ * reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -202,7 +203,7 @@ int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdm } while (0); ); - return OMPI_SUCCESS; + return ret; } int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t *win) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index eb91100b24e..92b76d4097e 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -190,9 +190,17 @@ void orte_plm_base_allocation_complete(int fd, short args, void *cbdata) ORTE_ACQUIRE_OBJECT(caddy); - /* move the state machine along */ - caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; - ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS); + /* if we don't want to launch, then we at least want + * to map so we can see where the procs would have + * gone - so skip to the mapping state */ + if (orte_do_not_launch) { + caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; + ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_MAP); + } else { + /* move the state machine along */ + caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; + ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS); + } /* cleanup */ OBJ_RELEASE(caddy); diff --git a/orte/mca/ras/base/ras_base_node.c b/orte/mca/ras/base/ras_base_node.c index e24e2a6bab8..8e8c8f10c24 100644 --- a/orte/mca/ras/base/ras_base_node.c +++ b/orte/mca/ras/base/ras_base_node.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -50,6 +50,8 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) bool hnp_alone = true, skiphnp = false; orte_attribute_t *kv; char **alias=NULL, **nalias; + orte_proc_t *daemon; + orte_job_t *djob; /* get the number of nodes */ num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes); @@ -76,6 +78,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) return rc; } + /* if we are not launching, get the daemon job */ + djob = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + /* get the hnp node's info */ hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); @@ -189,6 +194,21 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) ORTE_ERROR_LOG(rc); return rc; } + if (orte_do_not_launch) { + /* create a daemon for this node since we won't be launching + * and the mapper needs to see a daemon - this is used solely + * for testing the mappers */ + daemon = OBJ_NEW(orte_proc_t); + daemon->name.jobid = ORTE_PROC_MY_NAME->jobid; + daemon->name.vpid = node->index; + daemon->state = ORTE_PROC_STATE_RUNNING; + OBJ_RETAIN(node); + daemon->node = node; + opal_pointer_array_set_item(djob->procs, daemon->name.vpid, daemon); + djob->num_procs++; + OBJ_RETAIN(daemon); + node->daemon = daemon; + } /* update the total slots in the job */ orte_ras_base.total_slots_alloc += node->slots; /* check if we have fqdn names in the allocation */ diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index dd7eea91c86..f12a3b4275a 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -23,6 +23,7 @@ #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/util/argv.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" @@ -179,6 +180,10 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; + /* pass it thru the filter so we create the summaries required by the mappers */ + if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) { + ORTE_ERROR_LOG(ORTE_ERROR); + } /* add it to our array */ t = OBJ_NEW(orte_topology_t); t->topo = topo; diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index df379994751..0ead042f5a4 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -246,7 +246,7 @@ static int bind_downwards(orte_job_t *jdata, hwloc_obj_type_t target, unsigned cache_level) { - int j; + int j, rc; orte_job_map_t *map; orte_proc_t *proc; hwloc_obj_t trg_obj, nxt_obj; @@ -367,7 +367,10 @@ static int bind_downwards(orte_job_t *jdata, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), node->name); } else { - opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset); + rc = opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset); + if (OPAL_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } opal_output(orte_rmaps_base_framework.framework_output, "%s BOUND PROC %s[%s] TO %s: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -841,7 +844,8 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { continue; } - if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) { + if (!orte_no_vm && !orte_do_not_launch && + (int)ORTE_PROC_MY_NAME->vpid != node->index) { continue; } if (!orte_do_not_launch) { diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 33c1f11a976..925c2305dbe 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -417,7 +417,33 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) } } - if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + if (orte_do_not_launch) { + /* compute the ranks and add the proc objects + * to the jdata->procs array */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + goto cleanup; + } + /* compute and save local ranks */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { + ORTE_ERROR_LOG(rc); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + goto cleanup; + } + /* compute and save location assignments */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { + ORTE_ERROR_LOG(rc); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + goto cleanup; + } + /* compute and save bindings */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { + ORTE_ERROR_LOG(rc); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + goto cleanup; + } + } else if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* compute and save location assignments */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); @@ -454,6 +480,11 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) } } + if (orte_do_not_launch) { + /* display the devel map */ + orte_rmaps_base_display_map(jdata); + } + /* set the job state to the next position */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE); diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index 2d4e364cc2c..9eaea79ccf2 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -304,15 +304,15 @@ static int rank_by(orte_job_t *jdata, { orte_app_context_t *app; hwloc_obj_t obj; - int num_objs, i, j, m, n, rc; + int num_objs, i, j, m, n, rc, nn; orte_vpid_t num_ranked=0; orte_node_t *node; orte_proc_t *proc, *pptr; - orte_vpid_t vpid; + orte_vpid_t vpid, np; int cnt; opal_pointer_array_t objs; - bool all_done; hwloc_obj_t locale; + orte_app_idx_t napp; if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) { return rank_span(jdata, target, cache_level); @@ -333,20 +333,21 @@ static int rank_by(orte_job_t *jdata, */ vpid = 0; - for (n=0; n < jdata->apps->size; n++) { + for (n=0, napp=0; napp < jdata->num_apps && n < jdata->apps->size; n++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { continue; } - + napp++; /* setup the pointer array */ OBJ_CONSTRUCT(&objs, opal_pointer_array_t); opal_pointer_array_init(&objs, 2, INT_MAX, 2); cnt = 0; - for (m=0; m < jdata->map->nodes->size; m++) { + for (m=0, nn=0; nn < jdata->map->num_nodes && m < jdata->map->nodes->size; m++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { continue; } + nn++; /* get the number of objects - only consider those we can actually use */ num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, @@ -376,80 +377,83 @@ static int rank_by(orte_job_t *jdata, * Perhaps someday someone will come up with a more efficient * algorithm, but this works for now. */ - all_done = false; - while (!all_done && cnt < app->num_procs) { - all_done = true; - /* cycle across the objects */ - for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) { - obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i); - /* find the next proc for this job and app_context */ - for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* ignore procs that are already ranked */ - if (ORTE_VPID_INVALID != proc->name.vpid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* ignore procs from other apps */ - if (proc->app_idx != app->idx) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* protect against bozo case */ - locale = NULL; - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - /* ignore procs not on this object */ - if (NULL == locale || - !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: proc at position %d is not on object %d", - j, i); - continue; - } - /* assign the vpid */ - proc->name.vpid = vpid++; - if (0 == cnt) { - app->first_rank = proc->name.vpid; - } - cnt++; + i = 0; + while (cnt < app->num_procs) { + /* get the next object */ + obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i); + if (NULL == obj) { + break; + } + /* scan across the procs and find the one that is on this object */ + np = 0; + for (j=0; np < node->num_procs && j < node->procs->size && cnt < app->num_procs; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + np++; + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid)); - /* insert the proc into the jdata array */ - if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { - OBJ_RELEASE(pptr); - } - OBJ_RETAIN(proc); - if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&objs); - return rc; - } - /* flag that one was mapped */ - all_done = false; - /* track where the highest vpid landed - this is our - * new bookmark - */ - jdata->bookmark = node; - /* move to next object */ - break; + "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs that are already ranked */ + if (ORTE_VPID_INVALID != proc->name.vpid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs from other apps */ + if (proc->app_idx != app->idx) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* protect against bozo case */ + locale = NULL; + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { + ORTE_ERROR_LOG(ORTE_ERROR); + return ORTE_ERROR; } + /* ignore procs not on this object */ + if (NULL == locale || + !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by: proc at position %d is not on object %d", + j, i); + continue; + } + /* assign the vpid */ + proc->name.vpid = vpid++; + if (0 == cnt) { + app->first_rank = proc->name.vpid; + } + cnt++; + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s", + j, i, ORTE_VPID_PRINT(proc->name.vpid)); + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); + if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&objs); + return rc; + } + num_ranked++; + /* track where the highest vpid landed - this is our + * new bookmark + */ + jdata->bookmark = node; + /* move to next object */ + break; } + i++; } } /* cleanup */ @@ -473,6 +477,9 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata) map = jdata->map; + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "RANKING POLICY: %s", orte_rmaps_base_print_ranking(map->ranking)); + /* start with the rank-by object options - if the object isn't * included in the topology, then we obviously cannot rank by it. * However, if this was the default ranking policy (as opposed to