diff --git a/orte/mca/ras/alps/ras_alps_module.c b/orte/mca/ras/alps/ras_alps_module.c index fa42c90ee9..b3b6a2a5fb 100644 --- a/orte/mca/ras/alps/ras_alps_module.c +++ b/orte/mca/ras/alps/ras_alps_module.c @@ -549,6 +549,7 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; + node->state = ORTE_NODE_STATE_UP; /* need to order these node ids so the regex generator * can properly function */ @@ -585,6 +586,7 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, node->slots_inuse = 0; node->slots_max = 0; node->slots = apNodes[ix].numPEs; + node->state = ORTE_NODE_STATE_UP; /* need to order these node ids so the regex generator * can properly function */ diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index bab7fa0bdb..589bd96042 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +39,7 @@ #include "orte/util/show_help.h" #include "orte/mca/ras/base/ras_private.h" +#include "orte/mca/ras/base/base.h" #include "ras_lsf.h" @@ -98,6 +100,8 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) if (NULL != node && 0 == strcmp(nodelist[i], node->name)) { /* it is a repeat - just bump the slot count */ ++node->slots; + opal_output_verbose(10, orte_ras_base_framework.framework_output, + "ras/lsf: +++ Node (%s) [slots=%d]", node->name, node->slots); continue; } @@ -107,7 +111,11 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; + node->state = ORTE_NODE_STATE_UP; opal_list_append(nodes, &node->super); + + opal_output_verbose(10, orte_ras_base_framework.framework_output, + "ras/lsf: New Node (%s) [slots=%d]", node->name, node->slots); } /* release the nodelist from lsf */ @@ -141,14 +149,20 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD); } - /* get the apps and set the hostfile attribute in each to point to - * the hostfile */ - for (i=0; i < jdata->apps->size; i++) { - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { - continue; - } - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, true, (void*)affinity_file, OPAL_STRING); + /* + * Do not set the hostfile attribute on each app_context since that + * would confuse the sequential mapper when it tries to assign bindings + * when running an MPMD job. + * Instead just overwrite the orte_default_hostfile so it will be + * general for all of the app_contexts. + */ + if( NULL != orte_default_hostfile ) { + free(orte_default_hostfile); + orte_default_hostfile = NULL; } + orte_default_hostfile = strdup(affinity_file); + opal_output_verbose(10, orte_ras_base_framework.framework_output, + "ras/lsf: Set default_hostfile to %s",orte_default_hostfile); return ORTE_SUCCESS; } diff --git a/orte/mca/ras/tm/ras_tm_module.c b/orte/mca/ras/tm/ras_tm_module.c index e58fdd4427..4daed28a49 100644 --- a/orte/mca/ras/tm/ras_tm_module.c +++ b/orte/mca/ras/tm/ras_tm_module.c @@ -212,6 +212,7 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid) node->slots_inuse = 0; node->slots_max = 0; node->slots = ppn; + node->state = ORTE_NODE_STATE_UP; opal_list_append(nodelist, &node->super); } else {