@@ -137,9 +137,9 @@ bcast_kary_tree_top(void *buff, int count,
137137 mca_coll_portals4_module_t * portals4_module )
138138{
139139 bool is_sync = request -> is_sync ;
140- int ret , seg ;
141- unsigned int i ;
142- int segment_nb = request -> u .bcast .segment_nb ;
140+ int ret ;
141+ unsigned int i , seg , seg_size , nb_long ;
142+ unsigned int segment_nb = request -> u .bcast .segment_nb ;
143143 unsigned int child_nb ;
144144 int size = ompi_comm_size (comm );
145145 int rank = ompi_comm_rank (comm );
@@ -201,15 +201,22 @@ bcast_kary_tree_top(void *buff, int count,
201201 COLL_PORTALS4_SET_BITS (match_bits , ompi_comm_get_cid (comm ), 0 , 0 ,
202202 COLL_PORTALS4_BCAST , 0 , internal_count );
203203
204+ /* The data will be cut in segment_nb segments.
205+ * nb_long segments will have a size of (seg_size + 1)
206+ * and (segment_nb - nb_long) segments will have a size of seg_size
207+ */
208+ seg_size = request -> u .bcast .tmpsize / segment_nb ;
209+ nb_long = request -> u .bcast .tmpsize % segment_nb ;
210+ opal_output_verbose (10 , ompi_coll_base_framework .framework_output , "seg_size=%d nb_long=%d segment_nb=%d" , seg_size , nb_long , segment_nb );
211+
204212 if (rank != root ) {
205213 for (seg = 1 , offset = 0 , length = 0 ;
206214 seg <= segment_nb ;
207215 seg ++ , offset += length ) {
208216
209217 /* Divide buffer into segments */
210- length = (seg < segment_nb ) ?
211- (request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb :
212- request -> u .bcast .tmpsize - ((request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb ) * (segment_nb - 1 );
218+ if (seg <= nb_long ) length = seg_size + 1 ;
219+ else length = seg_size ;
213220
214221 /*
215222 ** Prepare Data ME
@@ -352,9 +359,10 @@ bcast_kary_tree_top(void *buff, int count,
352359 seg ++ , offset += length ) {
353360
354361 /* Divide buffer into segments */
355- length = (seg < segment_nb ) ?
356- (request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb :
357- request -> u .bcast .tmpsize - ((request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb ) * (segment_nb - 1 );
362+ if (seg <= nb_long ) length = seg_size + 1 ;
363+ else length = seg_size ;
364+ opal_output_verbose (10 , ompi_coll_base_framework .framework_output ,
365+ "bcast with k-ary tree : segment of size %ld" , length );
358366
359367 /* compute the triggering threshold to send data to the children */
360368 trig_thr = (rank == root ) ? (segment_nb ) :
@@ -440,8 +448,9 @@ bcast_pipeline_top(void *buff, int count,
440448 mca_coll_portals4_module_t * portals4_module )
441449{
442450 bool is_sync = request -> is_sync ;
443- int ret , seg ;
444- int segment_nb = request -> u .bcast .segment_nb ;
451+ int ret ;
452+ unsigned int seg , seg_size , nb_long ;
453+ unsigned int segment_nb = request -> u .bcast .segment_nb ;
445454 int size = ompi_comm_size (comm );
446455 int rank = ompi_comm_rank (comm );
447456 ptl_rank_t parent , child ;
@@ -492,16 +501,22 @@ bcast_pipeline_top(void *buff, int count,
492501
493502 COLL_PORTALS4_SET_BITS (match_bits , ompi_comm_get_cid (comm ), 0 , 0 ,
494503 COLL_PORTALS4_BCAST , 0 , internal_count );
504+ /* The data will be cut in segment_nb segments.
505+ * nb_long segments will have a size of (seg_size + 1)
506+ * and (segment_nb - nb_long) segments will have a size of seg_size
507+ */
508+ seg_size = request -> u .bcast .tmpsize / segment_nb ;
509+ nb_long = request -> u .bcast .tmpsize % segment_nb ;
510+ opal_output_verbose (10 , ompi_coll_base_framework .framework_output , "seg_size=%d nb_long=%d" , seg_size , nb_long );
495511
496512 if (rank != root ) {
497513 for (seg = 1 , offset = 0 , length = 0 ;
498514 seg <= segment_nb ;
499515 seg ++ , offset += length ) {
500516
501517 /* Divide buffer into segments */
502- length = (seg < segment_nb ) ?
503- (request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb :
504- request -> u .bcast .tmpsize - ((request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb ) * (segment_nb - 1 );
518+ if (seg <= nb_long ) length = seg_size + 1 ;
519+ else length = seg_size ;
505520
506521 /*
507522 ** Prepare Data ME
@@ -642,9 +657,10 @@ bcast_pipeline_top(void *buff, int count,
642657 seg ++ , offset += length ) {
643658
644659 /* Divide buffer into segments */
645- length = (seg < segment_nb ) ?
646- (request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb :
647- request -> u .bcast .tmpsize - ((request -> u .bcast .tmpsize + segment_nb - 1 ) / segment_nb ) * (segment_nb - 1 );
660+ if (seg <= nb_long ) length = seg_size + 1 ;
661+ else length = seg_size ;
662+ opal_output_verbose (10 , ompi_coll_base_framework .framework_output ,
663+ "bcast with pipeline : segment of size %ld \n" , length );
648664
649665 /* compute the triggering threshold to send data to the children */
650666 trig_thr = (rank == root ) ? (segment_nb ) :
0 commit comments