Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ Master (not on release branches yet)
via --enable-mpi-cxx.
- Removed embedded VampirTrace. It is in maintenance mode since 2013.
Please consider Score-P (score-p.org) as an external replacement.
- Add a mca parameter ras_base_launch_orted_on_hn to allow for launching
MPI processes on the same node where mpirun is executing using a separate
orte daemon, rather than the mpirun process. This may be useful to set to
true when using SLURM, as it improves interoperability with SLURM's signal
propagation tools. By default it is set to false, except for Cray XC systems.

3.0.0 -- July, 2017
-------------------
Expand Down
1 change: 1 addition & 0 deletions orte/mca/ras/base/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef struct orte_ras_base_t {
orte_ras_base_module_t *active_module;
int total_slots_alloc;
int multiplier;
bool launch_orted_on_hn;
} orte_ras_base_t;

ORTE_DECLSPEC extern orte_ras_base_t orte_ras_base;
Expand Down
25 changes: 25 additions & 0 deletions orte/mca/ras/base/ras_base_frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,31 @@ static int ras_register(mca_base_register_flag_t flags)
NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.multiplier);
#if SLURM_CRAY_ENV
/*
* If we are in a Cray-SLURM environment, then we cannot
* launch procs local to the HNP. The problem
* is the MPI processes launched on the head node (where the
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
* (mpirun) which is not a child of a slurmd daemon. This
* means that any RDMA credentials obtained via the odls/alps
* local launcher are incorrect. Test for this condition. If
* found, then take steps to ensure we launch a daemon on
* the same node as mpirun and that it gets used to fork
* local procs instead of mpirun so they get the proper
* credential */

orte_ras_base.launch_orted_on_hn = true;
#else
orte_ras_base.launch_orted_on_hn = false;
#endif

mca_base_var_register("orte", "ras", "base", "launch_orted_on_hn",
"Launch an orte daemon on the head node",
MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.launch_orted_on_hn);
return ORTE_SUCCESS;
}

Expand Down
41 changes: 16 additions & 25 deletions orte/mca/ras/base/ras_base_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
Expand Down Expand Up @@ -78,33 +78,24 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)

/* get the hnp node's info */
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
#if SLURM_CRAY_ENV
/* if we are in a Cray-SLURM environment, then we cannot
* launch procs local to the HNP. The problem
* is the MPI processes launched on the head node (where the
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
* (mpirun) which is not a child of a slurmd daemon. This
* means that any RDMA credentials obtained via the odls/alps
* local launcher are incorrect. Test for this condition. If
* found, then take steps to ensure we launch a daemon on
* the same node as mpirun and that it gets used to fork
* local procs instead of mpirun so they get the proper
* credential */
if (NULL != hnp_node) {
OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
if (orte_ifislocal(node->name)) {
orte_hnp_is_allocated = true;
break;

if ((orte_ras_base.launch_orted_on_hn == true) &&
(orte_managed_allocation)) {
if (NULL != hnp_node) {
OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
if (orte_ifislocal(node->name)) {
orte_hnp_is_allocated = true;
break;
}
}
if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) &
ORTE_MAPPING_NO_USE_LOCAL)) {
hnp_node->name = strdup("mpirun");
skiphnp = true;
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
}
}
if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) {
hnp_node->name = strdup("mpirun");
skiphnp = true;
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
}
}
#endif


/* cycle through the list */
while (NULL != (item = opal_list_remove_first(nodes))) {
Expand Down