Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ompi/mca/coll/portals4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ local_sources = \
coll_portals4_barrier.c \
coll_portals4_bcast.c \
coll_portals4_reduce.c \
coll_portals4_gather.c \
coll_portals4_request.h \
coll_portals4_request.c

Expand Down
96 changes: 96 additions & 0 deletions ompi/mca/coll/portals4/coll_portals4.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,18 @@
#include "ompi/datatype/ompi_datatype_internal.h"
#include "ompi/op/op.h"
#include "ompi/mca/mca.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/request/request.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"

#include "ompi/mca/mtl/portals4/mtl_portals4.h"

#define MAXTREEFANOUT 32

BEGIN_C_DECLS

#define COLL_PORTALS4_NO_OP ((ptl_op_t)-1)
Expand Down Expand Up @@ -61,10 +66,27 @@ struct mca_coll_portals4_component_t {

ptl_ni_limits_t ni_limits;

int use_binomial_gather_algorithm;

};
typedef struct mca_coll_portals4_component_t mca_coll_portals4_component_t;
OMPI_MODULE_DECLSPEC extern mca_coll_portals4_component_t mca_coll_portals4_component;


/*
* Borrowed with thanks from the coll-tuned component, then modified for Portals4.
*/
typedef struct ompi_coll_portals4_tree_t {
int32_t tree_root;
int32_t tree_fanout;
int32_t tree_bmtree;
int32_t tree_prev;
int32_t tree_next[MAXTREEFANOUT];
int32_t tree_nextsize;
int32_t tree_numdescendants;
} ompi_coll_portals4_tree_t;


struct mca_coll_portals4_module_t {
mca_coll_base_module_t super;
size_t coll_count;
Expand All @@ -79,6 +101,13 @@ struct mca_coll_portals4_module_t {
mca_coll_base_module_t *previous_allreduce_module;
mca_coll_base_module_iallreduce_fn_t previous_iallreduce;
mca_coll_base_module_t *previous_iallreduce_module;

/* binomial tree */
ompi_coll_portals4_tree_t *cached_in_order_bmtree;
int cached_in_order_bmtree_root;

size_t barrier_count;
size_t gather_count;
};
typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t;
OBJ_CLASS_DECLARATION(mca_coll_portals4_module_t);
Expand Down Expand Up @@ -135,6 +164,22 @@ int
opal_stderr(const char *msg, const char *file,
const int line, const int ret);

/*
* Borrowed with thanks from the coll-tuned component.
*/
#define COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, PORTALS4_MODULE, ROOT ) \
do { \
if( !( ((PORTALS4_MODULE)->cached_in_order_bmtree) \
&& ((PORTALS4_MODULE)->cached_in_order_bmtree_root == (ROOT)) ) ) { \
if( (PORTALS4_MODULE)->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
ompi_coll_portals4_destroy_tree( &((PORTALS4_MODULE)->cached_in_order_bmtree) ); \
} \
(PORTALS4_MODULE)->cached_in_order_bmtree = ompi_coll_portals4_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
(PORTALS4_MODULE)->cached_in_order_bmtree_root = (ROOT); \
} \
} while (0)


int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
Expand Down Expand Up @@ -177,6 +222,20 @@ int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int
int
ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *request);

int ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
ompi_request_t **request,
mca_coll_base_module_t *module);
int ompi_coll_portals4_igather_intra_fini(struct ompi_coll_portals4_request_t *request);


static inline ptl_process_t
ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank)
{
Expand Down Expand Up @@ -357,6 +416,43 @@ void get_k_ary_tree(const unsigned int k_ary,
return;
}


static inline void
ompi_coll_portals4_create_recv_converter (opal_convertor_t *converter,
void *target,
ompi_proc_t *proc,
int count,
ompi_datatype_t *datatype)
{
/* create converter */
OBJ_CONSTRUCT(converter, opal_convertor_t);

/* initialize converter */
opal_convertor_copy_and_prepare_for_recv(proc->super.proc_convertor,
&datatype->super,
count,
target,
0,
converter);
}

static inline void
ompi_coll_portals4_create_send_converter (opal_convertor_t *converter,
const void *source,
ompi_proc_t *proc,
int count,
ompi_datatype_t *datatype)
{
OBJ_CONSTRUCT(converter, opal_convertor_t);

opal_convertor_copy_and_prepare_for_send(proc->super.proc_convertor,
&datatype->super,
count,
source,
0,
converter);
}

END_C_DECLS

#endif /* MCA_COLL_PORTALS4_EXPORT_H */
27 changes: 23 additions & 4 deletions ompi/mca/coll/portals4/coll_portals4_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ portals4_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_portals4_priority);

mca_coll_portals4_component.use_binomial_gather_algorithm = 0;
(void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "use_binomial_gather_algorithm",
"if 1 use a binomial tree algorithm for gather, otherwise use linear",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_portals4_component.use_binomial_gather_algorithm);

return OMPI_SUCCESS;
}

Expand Down Expand Up @@ -463,7 +471,7 @@ portals4_init_query(bool enable_progress_threads,
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length));
OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length));

/* setup finish ack ME */
me.start = NULL;
Expand All @@ -472,7 +480,7 @@ portals4_init_query(bool enable_progress_threads,
me.min_free = 0;
me.uid = mca_coll_portals4_component.uid;
me.options = PTL_ME_OP_PUT |
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = 0;
Expand Down Expand Up @@ -584,6 +592,12 @@ portals4_comm_query(struct ompi_communicator_t *comm,
portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra;
portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra;

portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra;
portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra;

portals4_module->cached_in_order_bmtree=NULL;
portals4_module->cached_in_order_bmtree_root=-1;

portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra;
portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra;

Expand All @@ -593,6 +607,9 @@ portals4_comm_query(struct ompi_communicator_t *comm,
portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra;
portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra;

portals4_module->barrier_count = 0;
portals4_module->gather_count = 0;

return &(portals4_module->super);
}

Expand Down Expand Up @@ -689,9 +706,11 @@ portals4_progress(void)
ompi_coll_portals4_iallreduce_intra_fini(ptl_request);
break;
case OMPI_COLL_PORTALS4_TYPE_SCATTER:
case OMPI_COLL_PORTALS4_TYPE_GATHER:
opal_output(ompi_coll_base_framework.framework_output,
"allreduce is not supported yet\n");
"scatter is not supported yet\n");
break;
case OMPI_COLL_PORTALS4_TYPE_GATHER:
ompi_coll_portals4_igather_intra_fini(ptl_request);
break;
}
}
Expand Down
Loading