From 9047d68f1d3d537adfe486e51700a154b297b9d9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Aug 2019 07:48:58 -0700 Subject: [PATCH 1/2] Allow individual jobs to set their map/rank/bind policies Override the defaults when provided. Ignore LSF binding file if user overrides by specifying a policy. Fixes #6631 Signed-off-by: Ralph Castain (cherry picked from commit ea0dfc321809db50f78e742da1d22f9ef59650a3) --- orte/mca/ras/lsf/ras_lsf_module.c | 16 ++++++++++++++-- orte/mca/rmaps/base/rmaps_base_frame.c | 1 + orte/orted/pmix/pmix_server_dyn.c | 21 --------------------- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index becec82f213..43b3c7e7571 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -70,6 +70,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) char *affinity_file; struct stat buf; char *ptr; + bool directives_given = false; /* get the list of allocated nodes */ if ((num_nodes = lsb_getalloc(&nodelist)) < 0) { @@ -112,8 +113,19 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) /* release the nodelist from lsf */ opal_argv_free(nodelist); + /* check to see if any mapping or binding directives were given */ + if (NULL != jdata && NULL != jdata->map) { + if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) || + OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { + directives_given = true; + } + } else if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) || + OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) { + directives_given = true; + } + /* check for an affinity file */ - if (NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { + if (!directives_given && NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { /* check to see if the file is empty - if it is, * then affinity wasn't actually set for this job */ if (0 != stat(affinity_file, &buf)) { diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index d933fe24b6a..ca3d5537a71 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -562,6 +562,7 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp) for (i=0; NULL != ck2[i]; i++) { if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) { ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SPAN); + ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_GIVEN); found = true; } else if (0 == strncasecmp(ck2[i], "pe", strlen("pe"))) { /* break this at the = sign to get the number */ diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 4735d248e11..f064cc40f7e 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -282,13 +282,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** MAP-BY ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_MAPBY)) { - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "mapping", info->data.string, - orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - return ORTE_ERR_BAD_PARAM; - } rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, info->data.string); if (ORTE_SUCCESS != rc) { @@ -297,13 +290,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** RANK-BY ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_RANKBY)) { - if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { - /* not allowed to provide multiple ranking policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "ranking", info->data.string, - orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); - return ORTE_ERR_BAD_PARAM; - } rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, jdata->map->mapping, info->data.string); @@ -313,13 +299,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** BIND-TO ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_BINDTO)) { - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - info->data.string, - opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return ORTE_ERR_BAD_PARAM; - } rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, info->data.string); if (ORTE_SUCCESS != rc) { From 7ebc42cd8f929c0dc018fed40b46b9a4aefca6be Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 7 Aug 2019 05:47:12 -0700 Subject: [PATCH 2/2] Fix typos Provide a missing header and paren Thanks to @zerothi for the assistance Signed-off-by: Ralph Castain (cherry picked from commit bd5a1765eea200651babc5bfd9f45a9f3cedefbc) --- orte/mca/ras/lsf/ras_lsf_module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index 43b3c7e7571..6dd3b68be5f 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -36,6 +36,7 @@ #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" @@ -120,7 +121,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) directives_given = true; } } else if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) || - OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) { + OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { directives_given = true; }