Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
Copyright (c) 2012 University of Houston. All rights reserved.
Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights
reserved.
$COPYRIGHT$
Expand Down Expand Up @@ -71,6 +71,7 @@ Master (not on release branches yet)
- Remove IB XRC support from the OpenIB BTL due to lack of support.
- Remove support for big endian PowerPC.
- Remove support for XL compilers older than v13.1
- Fix rank-by algorithms to properly rank by object and span

3.0.0 -- September, 2017
------------------------
Expand Down
4 changes: 2 additions & 2 deletions ompi/mca/osc/rdma/osc_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -568,7 +568,7 @@ static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync)

OPAL_THREAD_SCOPED_LOCK(&sync->lock,
OPAL_LIST_FOREACH_SAFE(aggregation, next, &sync->aggregations, ompi_osc_rdma_aggregation_t) {
fprintf (stderr, "Flushing aggregation %p, peeer %p\n", aggregation, aggregation->peer);
fprintf (stderr, "Flushing aggregation %p, peer %p\n", (void*)aggregation, (void*)aggregation->peer);
ompi_osc_rdma_peer_aggregate_flush (aggregation->peer);
});
}
Expand Down
22 changes: 9 additions & 13 deletions ompi/mca/osc/rdma/osc_rdma_accumulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* reserved.
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -50,6 +50,7 @@ struct ompi_osc_rdma_event_t {

typedef struct ompi_osc_rdma_event_t ompi_osc_rdma_event_t;

#if 0
static void *ompi_osc_rdma_event_put (int fd, int flags, void *context)
{
ompi_osc_rdma_event_t *event = (ompi_osc_rdma_event_t *) context;
Expand Down Expand Up @@ -112,7 +113,7 @@ static int ompi_osc_rdma_event_queue (ompi_osc_rdma_module_t *module, struct mca

return OMPI_SUCCESS;
}

#endif

static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype,
void *result_buffer, int result_count, ompi_datatype_t *result_datatype,
Expand Down Expand Up @@ -188,10 +189,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request)
{
ompi_osc_rdma_module_t *module = sync->module;
const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment);
unsigned long len = target_count * target_datatype->super.size;
ompi_osc_rdma_frag_t *frag = NULL;
volatile bool complete = false;
char *ptr = NULL;
int ret;

Expand Down Expand Up @@ -523,7 +521,7 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const
ompi_osc_rdma_module_t *module = sync->module;
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
int ret, btl_op, flags;
int64_t origin, result;
int64_t origin;

if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
Expand Down Expand Up @@ -590,13 +588,13 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi
new_value = old_value;

if (&ompi_mpi_op_replace.op == op) {
memcpy ((void *)((intptr_t) &new_value) + offset, origin_addr, extent);
memcpy ((void *)((intptr_t) &new_value + offset), origin_addr, extent);
} else if (&ompi_mpi_op_no_op.op != op) {
ompi_op_reduce (op, (void *) origin_addr, (void *)((intptr_t) &new_value) + offset, 1, dt);
ompi_op_reduce (op, (void *) origin_addr, (void*)((intptr_t) &new_value + offset), 1, dt);
}

ret = ompi_osc_rdma_btl_cswap (module, peer->data_endpoint, address, target_handle,
old_value, new_value, 0, &new_value);
old_value, new_value, 0, (int64_t*)&new_value);
if (OPAL_SUCCESS != ret || new_value == old_value) {
break;
}
Expand All @@ -605,7 +603,7 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi
} while (1);

if (result_addr) {
memcpy (result_addr, (void *)((intptr_t) &new_value) + offset, extent);
memcpy (result_addr, (void *)((intptr_t) &new_value + offset), extent);
}

if (OPAL_SUCCESS == ret) {
Expand Down Expand Up @@ -696,11 +694,9 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
mca_btl_base_registration_handle_t *target_handle, bool lock_acquired)
{
ompi_osc_rdma_module_t *module = sync->module;
const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment);
unsigned long offset, aligned_len, len = datatype->super.size;
unsigned long len = datatype->super.size;
mca_btl_base_registration_handle_t *local_handle = NULL;
ompi_osc_rdma_frag_t *frag = NULL;
ompi_osc_rdma_request_t *request;
volatile bool complete = false;
/* drop the const. this code will not attempt to change the value */
char *ptr = (char *) source_addr;
Expand Down
5 changes: 2 additions & 3 deletions ompi/mca/osc/rdma/osc_rdma_active_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* Copyright (c) 2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -80,7 +80,7 @@ void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_b
{
ompi_osc_rdma_pending_op_t *pending_op = (ompi_osc_rdma_pending_op_t *) context;

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "pending atomic %p complete with status %d", pending_op, status);
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "pending atomic %p complete with status %d", (void*)pending_op, status);

if (pending_op->op_result) {
memmove (pending_op->op_result, pending_op->op_buffer, pending_op->op_size);
Expand Down Expand Up @@ -296,7 +296,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_peer_t **peers;
int my_rank = ompi_comm_rank (module->comm);
ompi_osc_rdma_state_t *state = module->state;
int ret = OMPI_SUCCESS;

Expand Down
6 changes: 5 additions & 1 deletion ompi/mca/osc/rdma/osc_rdma_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
/*
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
Expand Down Expand Up @@ -492,6 +492,7 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee
return ret;
}

#if 0
static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggregation, ompi_osc_rdma_request_t *request,
void *source_buffer, size_t size)
{
Expand Down Expand Up @@ -550,13 +551,16 @@ static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_r

return OMPI_SUCCESS;
}
#endif

int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size,
ompi_osc_rdma_request_t *request)
{
ompi_osc_rdma_module_t *module = sync->module;
#if 0
ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate;
#endif
mca_btl_base_registration_handle_t *local_handle = NULL;
mca_btl_base_rdma_completion_fn_t cbfunc = NULL;
ompi_osc_rdma_frag_t *frag = NULL;
Expand Down
3 changes: 2 additions & 1 deletion ompi/mca/osc/rdma/osc_rdma_passive_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -202,7 +203,7 @@ int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdm
} while (0);
);

return OMPI_SUCCESS;
return ret;
}

int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t *win)
Expand Down
14 changes: 11 additions & 3 deletions orte/mca/plm/base/plm_base_launch_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,17 @@ void orte_plm_base_allocation_complete(int fd, short args, void *cbdata)

ORTE_ACQUIRE_OBJECT(caddy);

/* move the state machine along */
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
/* if we don't want to launch, then we at least want
* to map so we can see where the procs would have
* gone - so skip to the mapping state */
if (orte_do_not_launch) {
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_MAP);
} else {
/* move the state machine along */
caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
}

/* cleanup */
OBJ_RELEASE(caddy);
Expand Down
22 changes: 21 additions & 1 deletion orte/mca/ras/base/ras_base_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -50,6 +50,8 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
bool hnp_alone = true, skiphnp = false;
orte_attribute_t *kv;
char **alias=NULL, **nalias;
orte_proc_t *daemon;
orte_job_t *djob;

/* get the number of nodes */
num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes);
Expand All @@ -76,6 +78,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
return rc;
}

/* if we are not launching, get the daemon job */
djob = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);

/* get the hnp node's info */
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);

Expand Down Expand Up @@ -189,6 +194,21 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
ORTE_ERROR_LOG(rc);
return rc;
}
if (orte_do_not_launch) {
/* create a daemon for this node since we won't be launching
* and the mapper needs to see a daemon - this is used solely
* for testing the mappers */
daemon = OBJ_NEW(orte_proc_t);
daemon->name.jobid = ORTE_PROC_MY_NAME->jobid;
daemon->name.vpid = node->index;
daemon->state = ORTE_PROC_STATE_RUNNING;
OBJ_RETAIN(node);
daemon->node = node;
opal_pointer_array_set_item(djob->procs, daemon->name.vpid, daemon);
djob->num_procs++;
OBJ_RETAIN(daemon);
node->daemon = daemon;
}
/* update the total slots in the job */
orte_ras_base.total_slots_alloc += node->slots;
/* check if we have fqdn names in the allocation */
Expand Down
7 changes: 6 additions & 1 deletion orte/mca/ras/simulator/ras_sim_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
Expand All @@ -23,6 +23,7 @@
#include "opal/mca/hwloc/hwloc-internal.h"
#include "opal/util/argv.h"

#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"

Expand Down Expand Up @@ -179,6 +180,10 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
/* pass it thru the filter so we create the summaries required by the mappers */
if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
ORTE_ERROR_LOG(ORTE_ERROR);
}
/* add it to our array */
t = OBJ_NEW(orte_topology_t);
t->topo = topo;
Expand Down
12 changes: 8 additions & 4 deletions orte/mca/rmaps/base/rmaps_base_binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -246,7 +246,7 @@ static int bind_downwards(orte_job_t *jdata,
hwloc_obj_type_t target,
unsigned cache_level)
{
int j;
int j, rc;
orte_job_map_t *map;
orte_proc_t *proc;
hwloc_obj_t trg_obj, nxt_obj;
Expand Down Expand Up @@ -367,7 +367,10 @@ static int bind_downwards(orte_job_t *jdata,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name), node->name);
} else {
opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset);
rc = opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset);
if (OPAL_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
opal_output(orte_rmaps_base_framework.framework_output,
"%s BOUND PROC %s[%s] TO %s: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
Expand Down Expand Up @@ -841,7 +844,8 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
continue;
}
if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) {
if (!orte_no_vm && !orte_do_not_launch &&
(int)ORTE_PROC_MY_NAME->vpid != node->index) {
continue;
}
if (!orte_do_not_launch) {
Expand Down
33 changes: 32 additions & 1 deletion orte/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,33 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}

if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
if (orte_do_not_launch) {
/* compute the ranks and add the proc objects
* to the jdata->procs array */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* compute and save location assignments */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* compute and save bindings */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
} else if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
/* compute and save location assignments */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
Expand Down Expand Up @@ -454,6 +480,11 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}

if (orte_do_not_launch) {
/* display the devel map */
orte_rmaps_base_display_map(jdata);
}

/* set the job state to the next position */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);

Expand Down
Loading