From 54f43b0986ce3ab99b486e81325084973a92530a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 20 Aug 2016 10:14:52 -0700 Subject: [PATCH] Restore the coll/sync module and provide a test to verify its operation (cherry picked from commit open-mpi/ompi@9888615e753e136d0043c917c1d3932e98014059) Update to match v2.x definitions Adjust priority downward Set the default value of the barrier counters to zero so the coll/sync component is off by default Fix typo in the COLL_SYNC macro --- .gitignore | 1 + ompi/mca/coll/sync/Makefile.am | 52 +++++ ompi/mca/coll/sync/coll_sync.h | 183 ++++++++++++++++++ ompi/mca/coll/sync/coll_sync_bcast.c | 47 +++++ ompi/mca/coll/sync/coll_sync_component.c | 104 ++++++++++ ompi/mca/coll/sync/coll_sync_exscan.c | 47 +++++ ompi/mca/coll/sync/coll_sync_gather.c | 50 +++++ ompi/mca/coll/sync/coll_sync_gatherv.c | 51 +++++ ompi/mca/coll/sync/coll_sync_module.c | 178 +++++++++++++++++ ompi/mca/coll/sync/coll_sync_reduce.c | 47 +++++ ompi/mca/coll/sync/coll_sync_reduce_scatter.c | 50 +++++ ompi/mca/coll/sync/coll_sync_scan.c | 46 +++++ ompi/mca/coll/sync/coll_sync_scatter.c | 50 +++++ ompi/mca/coll/sync/coll_sync_scatterv.c | 50 +++++ ompi/mca/coll/sync/help-coll-sync.txt | 22 +++ ompi/mca/coll/sync/owner.txt | 7 + orte/test/mpi/Makefile | 2 +- orte/test/mpi/badcoll.c | 28 +++ 18 files changed, 1014 insertions(+), 1 deletion(-) create mode 100644 ompi/mca/coll/sync/Makefile.am create mode 100644 ompi/mca/coll/sync/coll_sync.h create mode 100644 ompi/mca/coll/sync/coll_sync_bcast.c create mode 100644 ompi/mca/coll/sync/coll_sync_component.c create mode 100644 ompi/mca/coll/sync/coll_sync_exscan.c create mode 100644 ompi/mca/coll/sync/coll_sync_gather.c create mode 100644 ompi/mca/coll/sync/coll_sync_gatherv.c create mode 100644 ompi/mca/coll/sync/coll_sync_module.c create mode 100644 ompi/mca/coll/sync/coll_sync_reduce.c create mode 100644 ompi/mca/coll/sync/coll_sync_reduce_scatter.c create mode 100644 ompi/mca/coll/sync/coll_sync_scan.c create mode 100644 ompi/mca/coll/sync/coll_sync_scatter.c create mode 100644 ompi/mca/coll/sync/coll_sync_scatterv.c create mode 100644 ompi/mca/coll/sync/help-coll-sync.txt create mode 100644 ompi/mca/coll/sync/owner.txt create mode 100644 orte/test/mpi/badcoll.c diff --git a/.gitignore b/.gitignore index 6c2b71960f..b3524bc3ff 100644 --- a/.gitignore +++ b/.gitignore @@ -395,6 +395,7 @@ orte/test/mpi/pconnect orte/test/mpi/thread_init orte/test/mpi/memcached-dummy orte/test/mpi/coll_test +orte/test/mpi/badcoll orte/test/system/radix orte/test/system/sigusr_trap diff --git a/ompi/mca/coll/sync/Makefile.am b/ompi/mca/coll/sync/Makefile.am new file mode 100644 index 0000000000..61c2437e96 --- /dev/null +++ b/ompi/mca/coll/sync/Makefile.am @@ -0,0 +1,52 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_ompidata_DATA = help-coll-sync.txt + +sources = \ + coll_sync.h \ + coll_sync_component.c \ + coll_sync_module.c \ + coll_sync_bcast.c \ + coll_sync_exscan.c \ + coll_sync_gather.c \ + coll_sync_gatherv.c \ + coll_sync_reduce.c \ + coll_sync_reduce_scatter.c \ + coll_sync_scan.c \ + coll_sync_scatter.c \ + coll_sync_scatterv.c + +if MCA_BUILD_ompi_coll_sync_DSO +component_noinst = +component_install = mca_coll_sync.la +else +component_noinst = libmca_coll_sync.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_coll_sync_la_SOURCES = $(sources) +mca_coll_sync_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_coll_sync_la_SOURCES =$(sources) +libmca_coll_sync_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/sync/coll_sync.h b/ompi/mca/coll/sync/coll_sync.h new file mode 100644 index 0000000000..1bf6120711 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_COLL_SYNC_EXPORT_H +#define MCA_COLL_SYNC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" + +#include "opal/class/opal_object.h" +#include "opal/mca/mca.h" +#include "opal/util/output.h" + +#include "ompi/constants.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* API functions */ + +int mca_coll_sync_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +mca_coll_base_module_t +*mca_coll_sync_comm_query(struct ompi_communicator_t *comm, + int *priority); + +int mca_coll_sync_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm); + +int mca_coll_sync_barrier(struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_bcast(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_exscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_gather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_gatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_reduce_scatter(const void *sbuf, void *rbuf, + int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_scan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_scatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_sync_scatterv(const void *sbuf, int *scounts, int *disps, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + + +/* Types */ +/* Module */ + +typedef struct mca_coll_sync_module_t { + mca_coll_base_module_t super; + + /* Pointers to all the "real" collective functions */ + mca_coll_base_comm_coll_t c_coll; + + /* How many ops we've executed */ + int before_num_operations; + + /* How many ops we've executed (it's easier to have 2) */ + int after_num_operations; + + /* Avoid recursion of syncs */ + bool in_operation; +} mca_coll_sync_module_t; + +OBJ_CLASS_DECLARATION(mca_coll_sync_module_t); + +/* Component */ + +typedef struct mca_coll_sync_component_t { + mca_coll_base_component_2_0_0_t super; + + /* Priority of this component */ + int priority; + + /* Do a sync *before* each Nth collective */ + int barrier_before_nops; + + /* Do a sync *after* each Nth collective */ + int barrier_after_nops; +} mca_coll_sync_component_t; + +/* Globally exported variables */ + +OMPI_MODULE_DECLSPEC extern mca_coll_sync_component_t mca_coll_sync_component; + +/* Macro used in most of the collectives */ + +#define COLL_SYNC(m, op) \ +do { \ + int err = MPI_SUCCESS; \ + (m)->in_operation = true; \ + if (OPAL_UNLIKELY(++((m)->before_num_operations) == \ + mca_coll_sync_component.barrier_before_nops)) { \ + (m)->before_num_operations = 0; \ + err = (m)->c_coll.coll_barrier(comm, (m)->c_coll.coll_barrier_module); \ + } \ + if (OPAL_LIKELY(MPI_SUCCESS == err)) { \ + err = op; \ + } \ + if (OPAL_UNLIKELY(++((m)->after_num_operations) == \ + mca_coll_sync_component.barrier_after_nops) && \ + OPAL_LIKELY(MPI_SUCCESS == err)) { \ + (m)->after_num_operations = 0; \ + err = (m)->c_coll.coll_barrier(comm, (m)->c_coll.coll_barrier_module); \ + } \ + (m)->in_operation = false; \ + return err; \ +} while(0) + +END_C_DECLS + +#endif /* MCA_COLL_SYNC_EXPORT_H */ diff --git a/ompi/mca/coll/sync/coll_sync_bcast.c b/ompi/mca/coll/sync/coll_sync_bcast.c new file mode 100644 index 0000000000..696f78dbc6 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_bcast.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "mpi.h" +#include "coll_sync.h" + + +/* + * bcast + * + * Function: - broadcast + * Accepts: - same arguments as MPI_Bcast() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_bcast(void *buff, int count, + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_bcast(buff, count, datatype, root, comm, + s->c_coll.coll_bcast_module); + } else { + COLL_SYNC(s, s->c_coll.coll_bcast(buff, count, datatype, root, comm, + s->c_coll.coll_bcast_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_component.c b/ompi/mca/coll/sync/coll_sync_component.c new file mode 100644 index 0000000000..46243f0c91 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_component.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/util/output.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "coll_sync.h" + +/* + * Public string showing the coll ompi_sync component version number + */ +const char *mca_coll_sync_component_version_string = + "Open MPI sync collective MCA component version " OMPI_VERSION; + +/* + * Local function + */ +static int sync_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +mca_coll_sync_component_t mca_coll_sync_component = { + { + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + .collm_version = { + MCA_COLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "sync", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_register_component_params = sync_register + }, + .collm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Initialization / querying functions */ + + .collm_init_query = mca_coll_sync_init_query, + .collm_comm_query = mca_coll_sync_comm_query + }, +}; + + +static int sync_register(void) +{ + mca_base_component_t *c = &mca_coll_sync_component.super.collm_version; + + mca_coll_sync_component.priority = 50; + (void) mca_base_component_var_register(c, "priority", + "Priority of the sync coll component; only relevant if barrier_before or barrier_after is > 0", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_sync_component.priority); + + mca_coll_sync_component.barrier_before_nops = 0; + (void) mca_base_component_var_register(c, "barrier_before", + "Do a synchronization before each Nth collective", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_sync_component.barrier_before_nops); + + mca_coll_sync_component.barrier_after_nops = 0; + (void) mca_base_component_var_register(c, "barrier_after", + "Do a synchronization after each Nth collective", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_sync_component.barrier_after_nops); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/coll/sync/coll_sync_exscan.c b/ompi/mca/coll/sync/coll_sync_exscan.c new file mode 100644 index 0000000000..3759c8ea9b --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_exscan.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * exscan + * + * Function: - exscan + * Accepts: - same arguments as MPI_Exscan() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_exscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_exscan_module); + } else { + COLL_SYNC(s, s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_exscan_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_gather.c b/ompi/mca/coll/sync/coll_sync_gather.c new file mode 100644 index 0000000000..2a49d93a93 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_gather.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * gather + * + * Function: - gather + * Accepts: - same arguments as MPI_Gather() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_gather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_gather(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_gather_module); + } else { + COLL_SYNC(s, s->c_coll.coll_gather(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_gather_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_gatherv.c b/ompi/mca/coll/sync/coll_sync_gatherv.c new file mode 100644 index 0000000000..486ad0ce89 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_gatherv.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * gatherv + * + * Function: - gatherv + * Accepts: - same arguments as MPI_Gatherv() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_gatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, int *disps, + struct ompi_datatype_t *rdtype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_gatherv(sbuf, scount, sdtype, + rbuf, rcounts, disps, rdtype, root, comm, + s->c_coll.coll_gatherv_module); + } else { + COLL_SYNC(s, s->c_coll.coll_gatherv(sbuf, scount, sdtype, + rbuf, rcounts, disps, rdtype, + root, comm, + s->c_coll.coll_gatherv_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_module.c b/ompi/mca/coll/sync/coll_sync_module.c new file mode 100644 index 0000000000..f2b8298059 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_module.c @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#ifdef HAVE_STRING_H +#include +#endif +#include + +#include "coll_sync.h" + +#include "mpi.h" + +#include "orte/util/show_help.h" +#include "orte/util/proc_info.h" + +#include "ompi/constants.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/base.h" +#include "coll_sync.h" + + +static void mca_coll_sync_module_construct(mca_coll_sync_module_t *module) +{ + memset(&(module->c_coll), 0, sizeof(module->c_coll)); + module->before_num_operations = 0; + module->after_num_operations = 0; + module->in_operation = false; +} + +static void mca_coll_sync_module_destruct(mca_coll_sync_module_t *module) +{ + OBJ_RELEASE(module->c_coll.coll_bcast_module); + OBJ_RELEASE(module->c_coll.coll_gather_module); + OBJ_RELEASE(module->c_coll.coll_gatherv_module); + OBJ_RELEASE(module->c_coll.coll_reduce_module); + OBJ_RELEASE(module->c_coll.coll_reduce_scatter_module); + OBJ_RELEASE(module->c_coll.coll_scatter_module); + OBJ_RELEASE(module->c_coll.coll_scatterv_module); + /* If the exscan module is not NULL, then this was an + intracommunicator, and therefore scan will have a module as + well. */ + if (NULL != module->c_coll.coll_exscan_module) { + OBJ_RELEASE(module->c_coll.coll_exscan_module); + OBJ_RELEASE(module->c_coll.coll_scan_module); + } +} + +OBJ_CLASS_INSTANCE(mca_coll_sync_module_t, mca_coll_base_module_t, + mca_coll_sync_module_construct, + mca_coll_sync_module_destruct); + + +/* + * Initial query function that is invoked during MPI_INIT, allowing + * this component to disqualify itself if it doesn't support the + * required level of thread support. + */ +int mca_coll_sync_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + return OMPI_SUCCESS; +} + + +/* + * Invoked when there's a new communicator that has been created. + * Look at the communicator and decide which set of functions and + * priority we want to return. + */ +mca_coll_base_module_t * +mca_coll_sync_comm_query(struct ompi_communicator_t *comm, + int *priority) +{ + mca_coll_sync_module_t *sync_module; + + sync_module = OBJ_NEW(mca_coll_sync_module_t); + if (NULL == sync_module) { + return NULL; + } + + /* If both MCA params are 0, then disqualify us */ + if (0 == mca_coll_sync_component.barrier_before_nops && + 0 == mca_coll_sync_component.barrier_after_nops) { + return NULL; + } + *priority = mca_coll_sync_component.priority; + + /* Choose whether to use [intra|inter] */ + sync_module->super.coll_module_enable = mca_coll_sync_module_enable; + + /* The "all" versions are already synchronous. So no need for an + additional barrier there. */ + sync_module->super.coll_allgather = NULL; + sync_module->super.coll_allgatherv = NULL; + sync_module->super.coll_allreduce = NULL; + sync_module->super.coll_alltoall = NULL; + sync_module->super.coll_alltoallv = NULL; + sync_module->super.coll_alltoallw = NULL; + sync_module->super.coll_barrier = NULL; + sync_module->super.coll_bcast = mca_coll_sync_bcast; + sync_module->super.coll_exscan = mca_coll_sync_exscan; + sync_module->super.coll_gather = mca_coll_sync_gather; + sync_module->super.coll_gatherv = mca_coll_sync_gatherv; + sync_module->super.coll_reduce = mca_coll_sync_reduce; + sync_module->super.coll_reduce_scatter = mca_coll_sync_reduce_scatter; + sync_module->super.coll_scan = mca_coll_sync_scan; + sync_module->super.coll_scatter = mca_coll_sync_scatter; + sync_module->super.coll_scatterv = mca_coll_sync_scatterv; + + return &(sync_module->super); +} + + +/* + * Init module on the communicator + */ +int mca_coll_sync_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm) +{ + bool good = true; + char *msg = NULL; + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + /* Save the prior layer of coll functions */ + s->c_coll = comm->c_coll; + +#define CHECK_AND_RETAIN(name) \ + if (NULL == s->c_coll.coll_ ## name ## _module) { \ + good = false; \ + msg = #name; \ + } else if (good) { \ + OBJ_RETAIN(s->c_coll.coll_ ## name ## _module); \ + } + + CHECK_AND_RETAIN(bcast); + CHECK_AND_RETAIN(gather); + CHECK_AND_RETAIN(gatherv); + CHECK_AND_RETAIN(reduce); + CHECK_AND_RETAIN(reduce_scatter); + CHECK_AND_RETAIN(scatter); + CHECK_AND_RETAIN(scatterv); + if (!OMPI_COMM_IS_INTER(comm)) { + /* MPI does not define scan/exscan on intercommunicators */ + CHECK_AND_RETAIN(exscan); + CHECK_AND_RETAIN(scan); + } + + /* All done */ + if (good) { + return OMPI_SUCCESS; + } else { + orte_show_help("help-coll-sync.txt", "missing collective", true, + orte_process_info.nodename, + mca_coll_sync_component.priority, msg); + return OMPI_ERR_NOT_FOUND; + } +} + diff --git a/ompi/mca/coll/sync/coll_sync_reduce.c b/ompi/mca/coll/sync/coll_sync_reduce.c new file mode 100644 index 0000000000..eec178c305 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_reduce.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * reduce + * + * Function: - reduce + * Accepts: - same as MPI_Reduce() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_reduce(sbuf, rbuf, count, dtype, op, root, comm, + s->c_coll.coll_reduce_module); + } else { + COLL_SYNC(s, s->c_coll.coll_reduce(sbuf, rbuf, count, dtype, + op, root, comm, + s->c_coll.coll_reduce_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_reduce_scatter.c b/ompi/mca/coll/sync/coll_sync_reduce_scatter.c new file mode 100644 index 0000000000..9643054fa6 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_reduce_scatter.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * reduce_scatter + * + * Function: - reduce then scatter + * Accepts: - same as MPI_Reduce_scatter() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_reduce_scatter(const void *sbuf, void *rbuf, int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_reduce_scatter(sbuf, rbuf, rcounts, + dtype, op, comm, + s->c_coll.coll_reduce_scatter_module); + } else { + COLL_SYNC(s, s->c_coll.coll_reduce_scatter(sbuf, rbuf, rcounts, + dtype, op, comm, + s->c_coll.coll_reduce_scatter_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_scan.c b/ompi/mca/coll/sync/coll_sync_scan.c new file mode 100644 index 0000000000..9608bc7e83 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_scan.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * scan + * + * Function: - scan + * Accepts: - same arguments as MPI_Scan() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_scan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_scan_module); + } else { + COLL_SYNC(s, s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, + s->c_coll.coll_scan_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_scatter.c b/ompi/mca/coll/sync/coll_sync_scatter.c new file mode 100644 index 0000000000..3c093d8674 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_scatter.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * scatter + * + * Function: - scatter + * Accepts: - same arguments as MPI_Scatter() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_scatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_scatter(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatter_module); + } else { + COLL_SYNC(s, s->c_coll.coll_scatter(sbuf, scount, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatter_module)); + } +} diff --git a/ompi/mca/coll/sync/coll_sync_scatterv.c b/ompi/mca/coll/sync/coll_sync_scatterv.c new file mode 100644 index 0000000000..369bdcf548 --- /dev/null +++ b/ompi/mca/coll/sync/coll_sync_scatterv.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "coll_sync.h" + + +/* + * scatterv + * + * Function: - scatterv + * Accepts: - same arguments as MPI_Scatterv() + * Returns: - MPI_SUCCESS or error code + */ +int mca_coll_sync_scatterv(const void *sbuf, int *scounts, + int *disps, struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module; + + if (s->in_operation) { + return s->c_coll.coll_scatterv(sbuf, scounts, disps, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatterv_module); + } else { + COLL_SYNC(s, s->c_coll.coll_scatterv(sbuf, scounts, disps, sdtype, + rbuf, rcount, rdtype, root, comm, + s->c_coll.coll_scatterv_module)); + } +} diff --git a/ompi/mca/coll/sync/help-coll-sync.txt b/ompi/mca/coll/sync/help-coll-sync.txt new file mode 100644 index 0000000000..4a5c871207 --- /dev/null +++ b/ompi/mca/coll/sync/help-coll-sync.txt @@ -0,0 +1,22 @@ +# -*- text -*- +# +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open MPI's sync +# collective component. +# +[missing collective] +The sync collective component in Open MPI was activated on a +communicator where it did not find an underlying collective operation +defined. This usually means that the sync collective module's +priority was not set high enough. Please try increasing sync's +priority. + + Local host: %s + Sync coll module priority: %d + First discovered missing collective: %s diff --git a/ompi/mca/coll/sync/owner.txt b/ompi/mca/coll/sync/owner.txt new file mode 100644 index 0000000000..f6e2c96b06 --- /dev/null +++ b/ompi/mca/coll/sync/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: Intel +status: maintenance diff --git a/orte/test/mpi/Makefile b/orte/test/mpi/Makefile index 0488a2033d..8dd29b0c1c 100644 --- a/orte/test/mpi/Makefile +++ b/orte/test/mpi/Makefile @@ -1,4 +1,4 @@ -PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect +PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring hello.sapp binding badcoll all: $(PROGS) diff --git a/orte/test/mpi/badcoll.c b/orte/test/mpi/badcoll.c new file mode 100644 index 0000000000..0b4bfdc22a --- /dev/null +++ b/orte/test/mpi/badcoll.c @@ -0,0 +1,28 @@ +#include +#include +#include "mpi.h" + +const int count = 1234; +int buffer[1234] = {0}; + +int main(int argc, char *argv[]) +{ + int rank, size, i; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + + for (i=0; i < 1000; i++) { + fprintf(stderr, "%d: Executing Bcast #%d\n", rank, i); + MPI_Bcast(buffer, count, MPI_INT, 0, MPI_COMM_WORLD); + if (0 != rank) { + sleep(1); + } + } + + MPI_Finalize(); + return 0; +} +