From acb1626409827f8aab7a9909e2afbe3a623be163 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 25 May 2016 19:42:31 -0700 Subject: [PATCH] Do not set a binding policy when we are overloading the default - in 1.10, we allow oversubscription by default for non-managed allocations, and we silently do-not-bind if the user has not requested a binding policy and we are overloaded. However, we do enforce oversubscription limits if we are in managed allocations OR the user specified the #slots for a node (either in hostfile or via -host). So be sure to check that for all cases. --- orte/mca/rmaps/base/rmaps_base_binding.c | 5 ++--- orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index a7bfb04526..6594950ffa 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -119,7 +119,7 @@ static void unbind_procs(orte_job_t *jdata) } } } - + static int bind_upwards(orte_job_t *jdata, orte_node_t *node, hwloc_obj_type_t target, @@ -335,7 +335,6 @@ static int bind_downwards(orte_job_t *jdata, return ORTE_ERR_SILENT; } else { /* if we have the default binding policy, then just don't bind */ - OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE); unbind_procs(jdata); hwloc_bitmap_zero(totalcpuset); return ORTE_SUCCESS; @@ -373,7 +372,7 @@ static int bind_downwards(orte_job_t *jdata, } } hwloc_bitmap_free(totalcpuset); - + return ORTE_SUCCESS; } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index 2ce3e5950a..37bda12c05 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -386,6 +386,24 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, * properly set */ node->oversubscribed = true; + /* check for permission */ + if (node->slots_given) { + /* if we weren't given a directive either way, then we will error out + * as the #slots were specifically given, either by the host RM or + * via hostfile/dash-host */ + if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { + orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", + true, app->num_procs, app->app); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + return ORTE_ERR_SILENT; + } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { + /* if we were explicitly told not to oversubscribe, then don't */ + orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", + true, app->num_procs, app->app); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + return ORTE_ERR_SILENT; + } + } } if (nprocs_mapped == app->num_procs) { /* we are done */