2121#include "coll_portals4.h"
2222#include "coll_portals4_request.h"
2323
24+ #include <string.h> // included for ffs in get_tree_numdescendants_of
2425
2526#undef RTR_USES_TRIGGERED_PUT
2627
5556 * |
5657 * 15
5758 */
59+
60+ static int32_t get_tree_numdescendants_of (struct ompi_communicator_t * comm ,
61+ int vrank )
62+ {
63+ int max ;
64+ int size = ompi_comm_size (comm );
65+
66+ if (0 == vrank ) {
67+ return size - 1 ;
68+ } else {
69+ max = 1 << ffs (vrank - 1 );
70+ return ((vrank + max <= size ) ? max : size - vrank ) - 1 ;
71+ }
72+
73+ }
74+
5875static ompi_coll_portals4_tree_t *
5976ompi_coll_portals4_build_in_order_bmtree ( struct ompi_communicator_t * comm ,
6077 int root )
@@ -506,8 +523,10 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
506523 int32_t expected_ops = 0 ;
507524 int32_t expected_acks = 0 ;
508525
526+ ptl_size_t number_of_fragment_gathered = 0 ;
527+ ptl_size_t number_of_fragment_send = 1 ;
509528
510- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
529+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
511530 "coll:portals4:gather_intra_binomial_top enter rank %d" , request -> u .gather .my_rank ));
512531
513532 request -> type = OMPI_COLL_PORTALS4_TYPE_GATHER ;
@@ -579,6 +598,23 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
579598 ret = setup_sync_handles (comm , request , portals4_module );
580599 if (MPI_SUCCESS != ret ) { line = __LINE__ ; goto err_hdlr ; }
581600
601+ OPAL_OUTPUT_VERBOSE ((10 , ompi_coll_base_framework .framework_output ,
602+ "%s:%d: packed_size=%lu, fragment_size=%lu" ,
603+ __FILE__ , __LINE__ , request -> u .gather .packed_size , mca_coll_portals4_component .ni_limits .max_msg_size ));
604+
605+ for (int i = 0 ; i < bmtree -> tree_nextsize ; i ++ ) {
606+ int child_vrank = VRANK (bmtree -> tree_next [i ], request -> u .gather .root_rank , request -> u .gather .size );
607+ int sub_tree_size = get_tree_numdescendants_of (comm , child_vrank ) + 1 ;
608+ ptl_size_t local_number_of_fragment = ((sub_tree_size * request -> u .gather .packed_size ) + mca_coll_portals4_component .ni_limits .max_msg_size - 1 ) / mca_coll_portals4_component .ni_limits .max_msg_size ;
609+
610+ OPAL_OUTPUT_VERBOSE ((10 , ompi_coll_base_framework .framework_output ,
611+ "%s:%d: %d is child of %d(%d) with %d descendants (nb_frag += %lu)" ,
612+ __FILE__ , __LINE__ , bmtree -> tree_next [i ], vrank , request -> u .gather .root_rank , sub_tree_size , local_number_of_fragment ));
613+ number_of_fragment_gathered += local_number_of_fragment ;
614+ }
615+
616+ number_of_fragment_send = (request -> u .gather .gather_bytes + mca_coll_portals4_component .ni_limits .max_msg_size - 1 ) / mca_coll_portals4_component .ni_limits .max_msg_size ;
617+
582618 /***********************************************/
583619 /* Chain the RTR and Recv-ACK to the Gather CT */
584620 /***********************************************/
@@ -603,7 +639,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
603639 if (vrank == 0 ) {
604640 /* root, so do nothing */
605641
606- expected_ops = bmtree -> tree_nextsize ; /* gather put from each child */
642+ expected_ops = number_of_fragment_gathered ; /* gather put from each child */
607643 expected_acks = 0 ;
608644
609645 } else {
@@ -617,22 +653,32 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
617653 __FILE__ , __LINE__ , vrank ,
618654 remote_offset , vrank , vparent , request -> u .gather .packed_size );
619655
620- expected_ops = bmtree -> tree_nextsize + 1 ; /* gather put from each child + a chained RTR */
656+ expected_ops = number_of_fragment_gathered + 1 ; /* gather puts from each child + a chained RTR */
621657 expected_acks = 1 ; /* Recv-ACK from parent */
622658
623- ret = PtlTriggeredPut (request -> u .gather .gather_mdh ,
624- request -> u .gather .gather_offset ,
625- request -> u .gather .gather_bytes ,
659+ ptl_size_t size_sent = 0 ;
660+ ptl_size_t size_left = request -> u .gather .gather_bytes ;
661+
662+ for (ptl_size_t i = 0 ; i < number_of_fragment_send ; i ++ ) {
663+ ptl_size_t frag_size = (size_left > mca_coll_portals4_component .ni_limits .max_msg_size ) ?
664+ mca_coll_portals4_component .ni_limits .max_msg_size :
665+ size_left ;
666+ ret = PtlTriggeredPut (request -> u .gather .gather_mdh ,
667+ request -> u .gather .gather_offset + size_sent ,
668+ frag_size ,
626669 PTL_NO_ACK_REQ ,
627670 ompi_coll_portals4_get_peer (comm , parent ),
628671 mca_coll_portals4_component .pt_idx ,
629672 request -> u .gather .gather_match_bits ,
630- remote_offset ,
673+ remote_offset + size_sent ,
631674 NULL ,
632675 0 ,
633676 request -> u .gather .gather_cth ,
634677 expected_ops );
635- if (PTL_OK != ret ) { ret = OMPI_ERROR ; line = __LINE__ ; goto err_hdlr ; }
678+ if (PTL_OK != ret ) { ret = OMPI_ERROR ; line = __LINE__ ; goto err_hdlr ; }
679+ size_left -= frag_size ;
680+ size_sent += frag_size ;
681+ }
636682 }
637683
638684 /************************************/
@@ -734,7 +780,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
734780
735781 ompi_coll_portals4_destroy_tree (& (portals4_module -> cached_in_order_bmtree ));
736782
737- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
783+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
738784 "coll:portals4:gather_intra_binomial_top exit rank %d" , request -> u .gather .my_rank ));
739785
740786 return OMPI_SUCCESS ;
@@ -773,8 +819,9 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
773819 int32_t expected_ops = 0 ;
774820 int32_t expected_acks = 0 ;
775821
822+ ptl_size_t number_of_fragment = 1 ;
776823
777- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
824+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
778825 "coll:portals4:gather_intra_linear_top enter rank %d" , request -> u .gather .my_rank ));
779826
780827 request -> type = OMPI_COLL_PORTALS4_TYPE_GATHER ;
@@ -843,6 +890,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
843890 ret = setup_sync_handles (comm , request , portals4_module );
844891 if (MPI_SUCCESS != ret ) { line = __LINE__ ; goto err_hdlr ; }
845892
893+ number_of_fragment = (request -> u .gather .packed_size > mca_coll_portals4_component .ni_limits .max_msg_size ) ?
894+ (request -> u .gather .packed_size + mca_coll_portals4_component .ni_limits .max_msg_size - 1 ) / mca_coll_portals4_component .ni_limits .max_msg_size :
895+ 1 ;
896+ opal_output_verbose (90 , ompi_coll_base_framework .framework_output ,
897+ "%s:%d:rank %d:number_of_fragment = %lu" ,
898+ __FILE__ , __LINE__ , request -> u .gather .my_rank , number_of_fragment );
899+
846900 /***********************************************/
847901 /* Chain the RTR and Recv-ACK to the Gather CT */
848902 /***********************************************/
@@ -867,11 +921,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
867921 if (i_am_root ) {
868922 /* root, so do nothing */
869923
870- expected_ops = request -> u .gather .size - 1 ; /* gather put from all other ranks */
924+ expected_ops = ( request -> u .gather .size - 1 ) * number_of_fragment ; /* gather put from all other ranks */
871925 expected_acks = 0 ;
872926
873927 } else {
874928 ptl_size_t remote_offset = request -> u .gather .my_rank * request -> u .gather .packed_size ;
929+ ptl_size_t split_offset = 0 ;
930+ ptl_size_t size_left = request -> u .gather .gather_bytes ;
875931
876932 opal_output_verbose (30 , ompi_coll_base_framework .framework_output ,
877933 "%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)" ,
@@ -881,19 +937,34 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
881937 expected_ops = 1 ; /* chained RTR */
882938 expected_acks = 1 ; /* Recv-ACK from root */
883939
884- ret = PtlTriggeredPut (request -> u .gather .gather_mdh ,
885- request -> u .gather .gather_offset ,
886- request -> u .gather .gather_bytes ,
940+ for (ptl_size_t j = 0 ; j < number_of_fragment ; j ++ ) {
941+
942+ ptl_size_t frag_size = (size_left > mca_coll_portals4_component .ni_limits .max_msg_size ) ?
943+ mca_coll_portals4_component .ni_limits .max_msg_size :
944+ size_left ;
945+
946+ opal_output_verbose (10 , ompi_coll_base_framework .framework_output ,
947+ "%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)" ,
948+ __FILE__ , __LINE__ , request -> u .gather .my_rank ,
949+ j , split_offset , frag_size );
950+
951+ ret = PtlTriggeredPut (request -> u .gather .gather_mdh ,
952+ request -> u .gather .gather_offset + split_offset ,
953+ frag_size ,
887954 PTL_NO_ACK_REQ ,
888955 ompi_coll_portals4_get_peer (comm , request -> u .gather .root_rank ),
889956 mca_coll_portals4_component .pt_idx ,
890957 request -> u .gather .gather_match_bits ,
891- remote_offset ,
958+ remote_offset + split_offset ,
892959 NULL ,
893960 0 ,
894961 request -> u .gather .gather_cth ,
895962 expected_ops );
896- if (PTL_OK != ret ) { ret = OMPI_ERROR ; line = __LINE__ ; goto err_hdlr ; }
963+ if (PTL_OK != ret ) { ret = OMPI_ERROR ; line = __LINE__ ; goto err_hdlr ; }
964+
965+ size_left -= frag_size ;
966+ split_offset += frag_size ;
967+ }
897968 }
898969
899970 /*****************************************/
@@ -997,7 +1068,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
9971068 "completed CTWait(expected_ops=%d)\n" , expected_ops );
9981069 }
9991070
1000- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1071+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
10011072 "coll:portals4:gather_intra_linear_top exit rank %d" , request -> u .gather .my_rank ));
10021073
10031074 return OMPI_SUCCESS ;
@@ -1020,7 +1091,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
10201091 int ret , line ;
10211092 int i ;
10221093
1023- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1094+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
10241095 "coll:portals4:gather_intra_binomial_bottom enter rank %d" , request -> u .gather .my_rank ));
10251096
10261097 ret = cleanup_gather_handles (request );
@@ -1065,7 +1136,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
10651136 ompi_request_complete (& request -> super , true);
10661137 OPAL_THREAD_UNLOCK (& ompi_request_lock );
10671138
1068- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1139+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
10691140 "coll:portals4:gather_intra_binomial_bottom exit rank %d" , request -> u .gather .my_rank ));
10701141
10711142 return OMPI_SUCCESS ;
@@ -1090,7 +1161,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
10901161 int ret , line ;
10911162 int i ;
10921163
1093- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1164+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
10941165 "coll:portals4:gather_intra_linear_bottom enter rank %d" , request -> u .gather .my_rank ));
10951166
10961167 ret = cleanup_gather_handles (request );
@@ -1128,7 +1199,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
11281199 ompi_request_complete (& request -> super , true);
11291200 OPAL_THREAD_UNLOCK (& ompi_request_lock );
11301201
1131- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1202+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
11321203 "coll:portals4:gather_intra_linear_bottom exit rank %d" , request -> u .gather .my_rank ));
11331204
11341205 return OMPI_SUCCESS ;
@@ -1157,7 +1228,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
11571228
11581229 ompi_coll_portals4_request_t * request ;
11591230
1160- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1231+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
11611232 "coll:portals4:gather_intra enter rank %d" , ompi_comm_rank (comm )));
11621233
11631234 /*
@@ -1204,7 +1275,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
12041275 */
12051276 OMPI_COLL_PORTALS4_REQUEST_RETURN (request );
12061277
1207- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1278+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
12081279 "coll:portals4:gather_intra exit rank %d" , request -> u .gather .my_rank ));
12091280
12101281 return OMPI_SUCCESS ;
@@ -1230,7 +1301,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
12301301
12311302 ompi_coll_portals4_request_t * request ;
12321303
1233- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1304+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
12341305 "coll:portals4:igather_intra enter rank %d" , ompi_comm_rank (comm )));
12351306
12361307 /*
@@ -1267,7 +1338,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
12671338 if (MPI_SUCCESS != ret ) { line = __LINE__ ; goto err_hdlr ; }
12681339 }
12691340
1270- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1341+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
12711342 "coll:portals4:igather_intra exit rank %d" , request -> u .gather .my_rank ));
12721343
12731344 return OMPI_SUCCESS ;
@@ -1286,7 +1357,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
12861357{
12871358 int ret , line ;
12881359
1289- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1360+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
12901361 "coll:portals4:igather_intra_fini enter rank %d" , request -> u .gather .my_rank ));
12911362
12921363 /*
@@ -1300,7 +1371,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
13001371 if (MPI_SUCCESS != ret ) { line = __LINE__ ; goto err_hdlr ; }
13011372 }
13021373
1303- OPAL_OUTPUT (( ompi_coll_base_framework .framework_output ,
1374+ OPAL_OUTPUT_VERBOSE (( 10 , ompi_coll_base_framework .framework_output ,
13041375 "coll:portals4:igather_intra_fini exit rank %d" , request -> u .gather .my_rank ));
13051376
13061377 return OMPI_SUCCESS ;
0 commit comments