Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions opal/mca/hwloc/base/hwloc_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1703,26 +1703,23 @@ static char *bitmap2rangestr(int bitmap)
static int build_map(int *num_sockets_arg, int *num_cores_arg,
hwloc_cpuset_t cpuset, int ***map, hwloc_topology_t topo)
{
static int num_sockets = -1, num_cores = -1;
int num_sockets, num_cores;
int socket_index, core_index, pu_index;
hwloc_obj_t socket, core, pu;
int **data;

/* Find out how many sockets we have (cached so that we don't have
to look this up every time) */
if (num_sockets < 0) {
num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET);
/* some systems (like the iMac) only have one
* socket and so don't report a socket
*/
if (0 == num_sockets) {
num_sockets = 1;
}
/* Lazy: take the total number of cores that we have in the
topology; that'll be more than the max number of cores
under any given socket */
num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE);
/* Find out how many sockets we have */
num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET);
/* some systems (like the iMac) only have one
* socket and so don't report a socket
*/
if (0 == num_sockets) {
num_sockets = 1;
}
/* Lazy: take the total number of cores that we have in the
topology; that'll be more than the max number of cores
under any given socket */
num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE);
*num_sockets_arg = num_sockets;
*num_cores_arg = num_cores;

Expand Down Expand Up @@ -1791,7 +1788,7 @@ int opal_hwloc_base_cset2str(char *str, int len,
int ret, socket_index, core_index;
char tmp[BUFSIZ];
const int stmp = sizeof(tmp) - 1;
int **map;
int **map=NULL;
hwloc_obj_t root;
opal_hwloc_topo_data_t *sum;

Expand Down Expand Up @@ -1819,7 +1816,6 @@ int opal_hwloc_base_cset2str(char *str, int len,
if (OPAL_SUCCESS != (ret = build_map(&num_sockets, &num_cores, cpuset, &map, topo))) {
return ret;
}

/* Iterate over the data matrix and build up the string */
first = true;
for (socket_index = 0; socket_index < num_sockets; ++socket_index) {
Expand All @@ -1837,8 +1833,12 @@ int opal_hwloc_base_cset2str(char *str, int len,
}
}
}
free(map[0]);
free(map);
if (NULL != map) {
if (NULL != map[0]) {
free(map[0]);
}
free(map);
}

return OPAL_SUCCESS;
}
Expand Down
2 changes: 2 additions & 0 deletions orte/mca/ras/simulator/ras_sim.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -22,6 +23,7 @@ struct orte_ras_sim_component_t {
char * slots;
char * slots_max;
char *topofiles;
char *topologies;
bool have_cpubind;
bool have_membind;
};
Expand Down
8 changes: 8 additions & 0 deletions orte/mca/ras/simulator/ras_sim_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -95,6 +96,13 @@ static int ras_sim_register(void)
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ras_simulator_component.topofiles);
mca_ras_simulator_component.topologies = NULL;
(void) mca_base_component_var_register (component, "topologies",
"Comma-separated list of topology descriptions for simulated nodes",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ras_simulator_component.topologies);
mca_ras_simulator_component.have_cpubind = true;
(void) mca_base_component_var_register (component, "have_cpubind",
"Topology supports binding to cpus",
Expand Down
124 changes: 103 additions & 21 deletions orte/mca/ras/simulator/ras_sim_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -56,6 +57,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
unsigned j, k;
struct hwloc_topology_support *support;
char **files=NULL;
char **topos = NULL;
bool use_local_topology = false;
#endif
char **node_cnt=NULL;
Expand All @@ -65,32 +67,39 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
char prefix[6];

node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ',');
slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ',');
max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ',');

/* backfill the slot_cnt as reqd so we don't have to
* specify slot_cnt for each set of nodes - we'll set
* */
tmp = slot_cnt[opal_argv_count(slot_cnt)-1];
for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) {
opal_argv_append_nosize(&slot_cnt, tmp);
if (NULL != mca_ras_simulator_component.slots) {
slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ',');
/* backfile the slot_cnt so every topology has a cnt */
tmp = slot_cnt[opal_argv_count(slot_cnt)-1];
for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) {
opal_argv_append_nosize(&slot_cnt, tmp);
}
}
/* backfill the max_slot_cnt as reqd */
tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1];
for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) {
opal_argv_append_nosize(&max_slot_cnt, tmp);
if (NULL != mca_ras_simulator_component.slots_max) {
max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ',');
/* backfill the max_slot_cnt as reqd */
tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1];
for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) {
opal_argv_append_nosize(&max_slot_cnt, tmp);
}
}

#if OPAL_HAVE_HWLOC
if (NULL == mca_ras_simulator_component.topofiles) {
/* use our topology */
use_local_topology = true;
} else {
if (NULL != mca_ras_simulator_component.topofiles) {
files = opal_argv_split(mca_ras_simulator_component.topofiles, ',');
if (opal_argv_count(files) != opal_argv_count(node_cnt)) {
orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
goto error_silent;
}
} else if (NULL != mca_ras_simulator_component.topologies) {
topos = opal_argv_split(mca_ras_simulator_component.topologies, ',');
if (opal_argv_count(topos) != opal_argv_count(node_cnt)) {
orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
goto error_silent;
}
} else {
/* use our topology */
use_local_topology = true;
}
#else
/* If we don't have hwloc and hwloc files were specified, then
Expand Down Expand Up @@ -123,7 +132,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
if (use_local_topology) {
/* use our topology */
topo = opal_hwloc_topology;
} else {
} else if (NULL != files) {
if (0 != hwloc_topology_init(&topo)) {
orte_show_help("help-ras-simulator.txt",
"hwloc API fail", true,
Expand Down Expand Up @@ -188,6 +197,69 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
t->topo = topo;
t->sig = opal_hwloc_base_get_topo_signature(topo);
opal_pointer_array_add(orte_node_topologies, t);
} else {
if (0 != hwloc_topology_init(&topo)) {
orte_show_help("help-ras-simulator.txt",
"hwloc API fail", true,
__FILE__, __LINE__, "hwloc_topology_init");
goto error_silent;
}
if (0 != hwloc_topology_set_synthetic(topo, topos[n])) {
orte_show_help("help-ras-simulator.txt",
"hwloc API fail", true,
__FILE__, __LINE__, "hwloc_topology_set_synthetic");
hwloc_topology_destroy(topo);
goto error_silent;
}
if (0 != hwloc_topology_load(topo)) {
orte_show_help("help-ras-simulator.txt",
"hwloc API fail", true,
__FILE__, __LINE__, "hwloc_topology_load");
hwloc_topology_destroy(topo);
goto error_silent;
}
if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
orte_show_help("help-ras-simulator.txt",
"hwloc API fail", true,
__FILE__, __LINE__, "opal_hwloc_base_filter_cpus");
hwloc_topology_destroy(topo);
goto error_silent;
}
/* remove the hostname from the topology. Unfortunately, hwloc
* decided to add the source hostname to the "topology", thus
* rendering it unusable as a pure topological description. So
* we remove that information here.
*/
obj = hwloc_get_root_obj(topo);
for (k=0; k < obj->infos_count; k++) {
if (NULL == obj->infos[k].name ||
NULL == obj->infos[k].value) {
continue;
}
if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
free(obj->infos[k].name);
free(obj->infos[k].value);
/* left justify the array */
for (j=k; j < obj->infos_count-1; j++) {
obj->infos[j] = obj->infos[j+1];
}
obj->infos[obj->infos_count-1].name = NULL;
obj->infos[obj->infos_count-1].value = NULL;
obj->infos_count--;
break;
}
}
/* unfortunately, hwloc does not include support info in its
* xml output :-(( To aid in debugging, we set it here
*/
support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
/* add it to our array */
t = OBJ_NEW(orte_topology_t);
t->topo = topo;
t->sig = opal_hwloc_base_get_topo_signature(topo);
opal_pointer_array_add(orte_node_topologies, t);
}
#endif

Expand All @@ -196,9 +268,19 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
asprintf(&node->name, "%s%0*d", prefix, dig, i);
node->state = ORTE_NODE_STATE_UP;
node->slots_inuse = 0;
node->slots_max = (NULL == max_slot_cnt[n] ? 0 : atoi(max_slot_cnt[n]));
node->slots = (NULL == slot_cnt[n] ? 0 : atoi(slot_cnt[n]));
#if OPAL_HAVE_HWLOC
if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) {
node->slots_max = 0;
} else {
obj = hwloc_get_root_obj(topo);
node->slots_max = opal_hwloc_base_get_npus(topo, obj);
}
if (NULL == slot_cnt || NULL == slot_cnt[n]) {
node->slots = 0;
} else {
obj = hwloc_get_root_obj(topo);
node->slots = opal_hwloc_base_get_npus(topo, obj);
}
node->topology = topo;
#endif
opal_output_verbose(1, orte_ras_base_framework.framework_output,
Expand Down
5 changes: 3 additions & 2 deletions orte/mca/rmaps/base/rmaps_base_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -277,7 +277,8 @@ static int bind_downwards(orte_job_t *jdata,
}
/* bozo check */
locale = NULL;
if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR) ||
NULL == locale) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-locale", true, ORTE_NAME_PRINT(&proc->name));
hwloc_bitmap_free(totalcpuset);
return ORTE_ERR_SILENT;
Expand Down
11 changes: 10 additions & 1 deletion orte/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,16 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
nprocs = 0;
for (i=0; i < jdata->apps->size; i++) {
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
nprocs += app->num_procs;
if (0 == app->num_procs) {
opal_list_t nodes;
orte_std_cntr_t slots;
OBJ_CONSTRUCT(&nodes, opal_list_t);
orte_rmaps_base_get_target_nodes(&nodes, &slots, app, ORTE_MAPPING_BYNODE, true, true);
OPAL_LIST_DESTRUCT(&nodes);
nprocs += slots;
} else {
nprocs += app->num_procs;
}
}
}
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
Expand Down
2 changes: 1 addition & 1 deletion orte/runtime/data_type_support/orte_dt_print_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_
if (orte_get_attribute(&src->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&loc, OPAL_PTR)) {
if (NULL != loc) {
if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(locale, sizeof(locale), src->node->topology, loc->cpuset)) {
strcpy(locale, "UNBOUND");
strcpy(locale, "NODE");
}
} else {
strcpy(locale, "UNKNOWN");
Expand Down