Skip to content

Commit df59d6c

Browse files
committed
coll-portals4: Correct and simplify how the data are cut in segment_nb segments (bcast)
1 parent 274f8d6 commit df59d6c

File tree

3 files changed

+38
-22
lines changed

3 files changed

+38
-22
lines changed

ompi/mca/coll/portals4/coll_portals4.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ is_reduce_optimizable(struct ompi_datatype_t *dtype, size_t length, struct ompi_
314314
}
315315

316316
*ptl_dtype = ompi_coll_portals4_atomic_datatype[dtype->id];
317-
if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE){
317+
if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE) {
318318
opal_output_verbose(50, ompi_coll_base_framework.framework_output,
319319
"datatype %d not supported\n",
320320
dtype->id);

ompi/mca/coll/portals4/coll_portals4_bcast.c

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@ bcast_kary_tree_top(void *buff, int count,
137137
mca_coll_portals4_module_t *portals4_module)
138138
{
139139
bool is_sync = request->is_sync;
140-
int ret, seg;
141-
unsigned int i;
142-
int segment_nb = request->u.bcast.segment_nb;
140+
int ret;
141+
unsigned int i, seg, seg_size, nb_long;
142+
unsigned int segment_nb = request->u.bcast.segment_nb;
143143
unsigned int child_nb;
144144
int size = ompi_comm_size(comm);
145145
int rank = ompi_comm_rank(comm);
@@ -201,15 +201,22 @@ bcast_kary_tree_top(void *buff, int count,
201201
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
202202
COLL_PORTALS4_BCAST, 0, internal_count);
203203

204+
/* The data will be cut in segment_nb segments.
205+
* nb_long segments will have a size of (seg_size + 1)
206+
* and (segment_nb - nb_long) segments will have a size of seg_size
207+
*/
208+
seg_size = request->u.bcast.tmpsize / segment_nb;
209+
nb_long = request->u.bcast.tmpsize % segment_nb;
210+
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d segment_nb=%d", seg_size, nb_long, segment_nb);
211+
204212
if (rank != root) {
205213
for (seg = 1, offset = 0, length = 0 ;
206214
seg <= segment_nb ;
207215
seg++, offset += length) {
208216

209217
/* Divide buffer into segments */
210-
length = (seg < segment_nb) ?
211-
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
212-
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
218+
if (seg <= nb_long) length = seg_size + 1;
219+
else length = seg_size;
213220

214221
/*
215222
** Prepare Data ME
@@ -352,9 +359,10 @@ bcast_kary_tree_top(void *buff, int count,
352359
seg++, offset += length) {
353360

354361
/* Divide buffer into segments */
355-
length = (seg < segment_nb) ?
356-
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
357-
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
362+
if (seg <= nb_long) length = seg_size + 1;
363+
else length = seg_size;
364+
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
365+
"bcast with k-ary tree : segment of size %ld", length);
358366

359367
/* compute the triggering threshold to send data to the children */
360368
trig_thr = (rank == root) ? (segment_nb) :
@@ -440,8 +448,9 @@ bcast_pipeline_top(void *buff, int count,
440448
mca_coll_portals4_module_t *portals4_module)
441449
{
442450
bool is_sync = request->is_sync;
443-
int ret, seg;
444-
int segment_nb = request->u.bcast.segment_nb;
451+
int ret;
452+
unsigned int seg, seg_size, nb_long;
453+
unsigned int segment_nb = request->u.bcast.segment_nb;
445454
int size = ompi_comm_size(comm);
446455
int rank = ompi_comm_rank(comm);
447456
ptl_rank_t parent, child;
@@ -492,16 +501,22 @@ bcast_pipeline_top(void *buff, int count,
492501

493502
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
494503
COLL_PORTALS4_BCAST, 0, internal_count);
504+
/* The data will be cut in segment_nb segments.
505+
* nb_long segments will have a size of (seg_size + 1)
506+
* and (segment_nb - nb_long) segments will have a size of seg_size
507+
*/
508+
seg_size = request->u.bcast.tmpsize / segment_nb;
509+
nb_long = request->u.bcast.tmpsize % segment_nb;
510+
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d", seg_size, nb_long);
495511

496512
if (rank != root) {
497513
for (seg = 1, offset = 0, length = 0 ;
498514
seg <= segment_nb ;
499515
seg++, offset += length) {
500516

501517
/* Divide buffer into segments */
502-
length = (seg < segment_nb) ?
503-
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
504-
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
518+
if (seg <= nb_long) length = seg_size + 1;
519+
else length = seg_size;
505520

506521
/*
507522
** Prepare Data ME
@@ -642,9 +657,10 @@ bcast_pipeline_top(void *buff, int count,
642657
seg++, offset += length) {
643658

644659
/* Divide buffer into segments */
645-
length = (seg < segment_nb) ?
646-
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
647-
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
660+
if (seg <= nb_long) length = seg_size + 1;
661+
else length = seg_size;
662+
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
663+
"bcast with pipeline : segment of size %ld \n", length);
648664

649665
/* compute the triggering threshold to send data to the children */
650666
trig_thr = (rank == root) ? (segment_nb) :

ompi/mca/coll/portals4/coll_portals4_reduce.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -385,10 +385,10 @@ ompi_coll_portals4_reduce_intra(const void *sendbuf, void *recvbuf, int count,
385385
ret = reduce_kary_tree_top(sendbuf, recvbuf, count,
386386
dtype, op, root, comm, request, portals4_module);
387387
if (OMPI_SUCCESS != ret)
388-
return ret;
388+
return ret;
389389
ret = reduce_kary_tree_bottom(request);
390390
if (OMPI_SUCCESS != ret)
391-
return ret;
391+
return ret;
392392

393393
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
394394
return (OMPI_SUCCESS);
@@ -422,7 +422,7 @@ ompi_coll_portals4_ireduce_intra(const void* sendbuf, void* recvbuf, int count,
422422
ret = reduce_kary_tree_top(sendbuf, recvbuf, count,
423423
dtype, op, root, comm, request, portals4_module);
424424
if (OMPI_SUCCESS != ret)
425-
return ret;
425+
return ret;
426426

427427
if (!request->u.reduce.is_optim) {
428428
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
@@ -439,7 +439,7 @@ ompi_coll_portals4_ireduce_intra_fini(ompi_coll_portals4_request_t *request)
439439

440440
ret = reduce_kary_tree_bottom(request);
441441
if (OMPI_SUCCESS != ret)
442-
return ret;
442+
return ret;
443443

444444
OPAL_THREAD_LOCK(&ompi_request_lock);
445445
ompi_request_complete(&request->super, true);

0 commit comments

Comments
 (0)