@@ -178,6 +178,97 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
178178    return  0 ;
179179}
180180
181+ static   ptl_size_t 
182+ number_of_fragment (ptl_size_t  length , ptl_size_t  maxlength )
183+ {
184+     ptl_size_t  nb_frag  =  length  ==  0  ? 1  : (length  -  1 ) / maxlength  +  1 ;
185+     OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
186+                          "%s,%d : %ld fragment(s)" , __FUNCTION__ , __LINE__ , nb_frag ));
187+     return  nb_frag ;
188+ }
189+ 
190+ static  int 
191+ splittedPtlPut (ptl_handle_md_t  md_h ,
192+             ptl_size_t  loc_offset ,
193+             ptl_size_t  length ,
194+             ptl_ack_req_t  ack_req ,
195+             ptl_process_t  target_id ,
196+             ptl_pt_index_t  pt_index ,
197+             ptl_match_bits_t  match_b ,
198+             ptl_size_t  rem_offset ,
199+             void  * usr_ptr ,
200+             ptl_hdr_data_t  hdr_data )
201+ {
202+     ptl_size_t  length_sent  =  0 ;
203+     do  {
204+         ptl_size_t  length_frag ;
205+         int  ret ;
206+ 
207+         length_frag  =  (length  >  mca_osc_portals4_component .ptl_max_msg_size ) ?
208+             mca_osc_portals4_component .ptl_max_msg_size  :
209+             length ;
210+         OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
211+                              "Put size : %lu/%lu, offset:%lu" , length_frag , length , length_sent ));
212+         ret  =  PtlPut (md_h ,
213+                      loc_offset  +  length_sent ,
214+                      length_frag ,
215+                      ack_req ,
216+                      target_id ,
217+                      pt_index ,
218+                      match_b ,
219+                      rem_offset  +  length_sent ,
220+                      usr_ptr ,
221+                      hdr_data );
222+         if  (PTL_OK  !=  ret ) {
223+             opal_output_verbose (1 , ompi_osc_base_framework .framework_output ,
224+                                  "%s:%d PtlPut failed with return value %d" ,
225+                                  __FUNCTION__ , __LINE__ , ret );
226+             return  ret ;
227+         }
228+         length  -=  length_frag ;
229+         length_sent  +=  length_frag ;
230+     } while  (length );
231+     return  PTL_OK ;
232+ }
233+ 
234+ static  int 
235+ splittedPtlGet (ptl_handle_md_t  md_h ,
236+                ptl_size_t  loc_offset ,
237+                ptl_size_t  length ,
238+                ptl_process_t  target_id ,
239+                ptl_pt_index_t  pt_index ,
240+                ptl_match_bits_t  match_b ,
241+                ptl_size_t  rem_offset ,
242+                void  * usr_ptr )
243+ {
244+     ptl_size_t  length_submitted  =  0 ;
245+     OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output , "Get" ));
246+ 
247+     do  {
248+         ptl_size_t  length_frag ;
249+         int  ret ;
250+         length_frag  =  (length  >  mca_osc_portals4_component .ptl_max_msg_size ) ?
251+             mca_osc_portals4_component .ptl_max_msg_size  :
252+             length ;
253+         ret  =  PtlGet (md_h ,
254+                      (ptl_size_t ) loc_offset  +  length_submitted ,
255+                      length_frag ,
256+                      target_id ,
257+                      pt_index ,
258+                      match_b ,
259+                      rem_offset  +  length_submitted ,
260+                      usr_ptr );
261+         if  (PTL_OK  !=  ret ) {
262+             opal_output_verbose (1 , ompi_osc_base_framework .framework_output ,
263+                                  "%s:%d PtlGet failed with return value %d" ,
264+                                  __FUNCTION__ , __LINE__ , ret );
265+             return  ret ;
266+         }
267+         length  -=  length_frag ;
268+         length_submitted  +=  length_frag ;
269+     } while  (length );
270+     return  PTL_OK ;
271+ }
181272
182273int 
183274ompi_osc_portals4_rput (const  void  * origin_addr ,
@@ -218,15 +309,18 @@ ompi_osc_portals4_rput(const void *origin_addr,
218309                    "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n" );
219310        return  OMPI_ERR_NOT_SUPPORTED ;
220311    } else  {
221-         (void )opal_atomic_add_64 (& module -> opcount , 1 );
222-         request -> ops_expected  =  1 ;
223312        ret  =  ompi_datatype_type_size (origin_dt , & length );
224313        if  (OMPI_SUCCESS  !=  ret ) {
225314            OMPI_OSC_PORTALS4_REQUEST_RETURN (request );
226315            return  ret ;
227316        }
228317        length  *= origin_count ;
229-         ret  =  PtlPut (module -> req_md_h ,
318+         request -> ops_expected  =  number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size );
319+         opal_atomic_add_64 (& module -> opcount , request -> ops_expected );
320+         OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
321+                              "%s,%d Put" , __FUNCTION__ , __LINE__ ));
322+ 
323+         ret  =  splittedPtlPut (module -> req_md_h ,
230324                     (ptl_size_t ) origin_addr ,
231325                     length ,
232326                     PTL_ACK_REQ ,
@@ -285,15 +379,17 @@ ompi_osc_portals4_rget(void *origin_addr,
285379                    "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n" );
286380        return  OMPI_ERR_NOT_SUPPORTED ;
287381    } else  {
288-         (void )opal_atomic_add_64 (& module -> opcount , 1 );
289-         request -> ops_expected  =  1 ;
290382        ret  =  ompi_datatype_type_size (origin_dt , & length );
291383        if  (OMPI_SUCCESS  !=  ret ) {
292384            OMPI_OSC_PORTALS4_REQUEST_RETURN (request );
293385            return  ret ;
294386        }
295387        length  *= origin_count ;
296-         ret  =  PtlGet (module -> req_md_h ,
388+         request -> ops_expected  =  number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size );
389+         opal_atomic_add_64 (& module -> opcount , request -> ops_expected );
390+         OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
391+                              "%s,%d Get" , __FUNCTION__ , __LINE__ ));
392+         ret  =  splittedPtlGet (module -> req_md_h ,
297393                     (ptl_size_t ) origin_addr ,
298394                     length ,
299395                     peer ,
@@ -368,11 +464,13 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
368464
369465        do  {
370466            size_t  msg_length  =  MIN (module -> atomic_max , length  -  sent );
371-             (void )opal_atomic_add_64 (& module -> opcount , 1 );
372-             request -> ops_expected ++ ;
373467
374468            if  (MPI_REPLACE  ==  op ) {
375-                 ret  =  PtlPut (module -> req_md_h ,
469+                 request -> ops_expected  +=  number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size );
470+                 opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
471+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
472+                              "%s,%d Put" , __FUNCTION__ , __LINE__ ));
473+                 ret  =  splittedPtlPut (module -> req_md_h ,
376474                             md_offset  +  sent ,
377475                             msg_length ,
378476                             PTL_ACK_REQ ,
@@ -383,6 +481,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
383481                             request ,
384482                             0 );
385483            } else  {
484+                 request -> ops_expected ++ ;
485+                 opal_atomic_add_64 (& module -> opcount , 1 );
386486                ret  =  ompi_osc_portals4_get_dt (origin_dt , & ptl_dt );
387487                if  (OMPI_SUCCESS  !=  ret ) {
388488                    opal_output (ompi_osc_base_framework .framework_output ,
@@ -396,7 +496,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
396496                            "MPI_Raccumulate: operation is not currently supported" );
397497                    return  OMPI_ERR_NOT_SUPPORTED ;
398498                }
399- 
499+                 OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
500+                                       "%s,%d Atomic" , __FUNCTION__ , __LINE__ ));
400501                ret  =  PtlAtomic (module -> req_md_h ,
401502                                offset  +  sent ,
402503                                msg_length ,
@@ -499,6 +600,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
499600                (void )opal_atomic_add_64 (& module -> opcount , 1 );
500601                request -> ops_expected ++ ;
501602
603+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
604+                                       "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
502605                ret  =  PtlSwap (module -> req_md_h ,
503606                              result_md_offset  +  sent ,
504607                              module -> md_h ,
@@ -530,10 +633,11 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
530633            do  {
531634                size_t  msg_length  =  MIN (module -> fetch_atomic_max , length  -  sent );
532635
533-                 (void )opal_atomic_add_64 (& module -> opcount , 1 );
534-                 request -> ops_expected ++ ;
535- 
536-                 ret  =  PtlGet (module -> req_md_h ,
636+                 opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
637+                 request -> ops_expected  +=  number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size );
638+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
639+                                      "%s,%d Get" , __FUNCTION__ , __LINE__ ));
640+                 ret  =  splittedPtlGet (module -> req_md_h ,
537641                             md_offset  +  sent ,
538642                             msg_length ,
539643                             peer ,
@@ -558,14 +662,14 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
558662
559663            ret  =  ompi_osc_portals4_get_dt (origin_dt , & ptl_dt );
560664            if  (OMPI_SUCCESS  !=  ret ) {
561-                   opal_output (ompi_osc_base_framework .framework_output ,
665+                 opal_output (ompi_osc_base_framework .framework_output ,
562666                        "MPI_Rget_accumulate: datatype is not currently supported" );
563667                return  OMPI_ERR_NOT_SUPPORTED ;
564668            }
565669
566670            ret  =  ompi_osc_portals4_get_op (op , & ptl_op );
567671            if  (OMPI_SUCCESS  !=  ret ) {
568-                   opal_output (ompi_osc_base_framework .framework_output ,
672+                 opal_output (ompi_osc_base_framework .framework_output ,
569673                        "MPI_Rget_accumulate: operation is not currently supported" );
570674                return  OMPI_ERR_NOT_SUPPORTED ;
571675            }
@@ -576,6 +680,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
576680                (void )opal_atomic_add_64 (& module -> opcount , 1 );
577681                request -> ops_expected ++ ;
578682
683+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
684+                                       "%s,%d FetchAtomic" , __FUNCTION__ , __LINE__ ));
579685                ret  =  PtlFetchAtomic (module -> req_md_h ,
580686                                     result_md_offset  +  sent ,
581687                                     module -> md_h ,
@@ -634,13 +740,15 @@ ompi_osc_portals4_put(const void *origin_addr,
634740                    "MPI_Put: transfer of non-contiguous memory is not currently supported.\n" );
635741        return  OMPI_ERR_NOT_SUPPORTED ;
636742    } else  {
637-         (void )opal_atomic_add_64 (& module -> opcount , 1 );
638743        ret  =  ompi_datatype_type_size (origin_dt , & length );
639744        if  (OMPI_SUCCESS  !=  ret ) {
640745            return  ret ;
641746        }
642747        length  *= origin_count ;
643-         ret  =  PtlPut (module -> md_h ,
748+         opal_atomic_add_64 (& module -> opcount , number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size ));
749+         OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
750+                              "%s,%d Put" , __FUNCTION__ , __LINE__ ));
751+         ret  =  splittedPtlPut (module -> md_h ,
644752                     (ptl_size_t ) origin_addr ,
645753                     length ,
646754                     PTL_ACK_REQ ,
@@ -691,13 +799,15 @@ ompi_osc_portals4_get(void *origin_addr,
691799                    "MPI_Get: transfer of non-contiguous memory is not currently supported.\n" );
692800        return  OMPI_ERR_NOT_SUPPORTED ;
693801    } else  {
694-         (void )opal_atomic_add_64 (& module -> opcount , 1 );
695802        ret  =  ompi_datatype_type_size (origin_dt , & length );
696803        if  (OMPI_SUCCESS  !=  ret ) {
697804            return  ret ;
698805        }
699806        length  *= origin_count ;
700-         ret  =  PtlGet (module -> md_h ,
807+         opal_atomic_add_64 (& module -> opcount , number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size ));
808+         OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
809+                               "%s,%d Get" , __FUNCTION__ , __LINE__ ));
810+         ret  =  splittedPtlGet (module -> md_h ,
701811                     (ptl_size_t ) origin_addr ,
702812                     length ,
703813                     peer ,
@@ -763,10 +873,12 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
763873
764874        do  {
765875            size_t  msg_length  =  MIN (module -> atomic_max , length  -  sent );
766-             (void )opal_atomic_add_64 (& module -> opcount , 1 );
767876
768877            if  (MPI_REPLACE  ==  op ) {
769-                 ret  =  PtlPut (module -> md_h ,
878+                 opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
879+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
880+                                      "%s,%d Put" , __FUNCTION__ , __LINE__ ));
881+                 ret  =  splittedPtlPut (module -> md_h ,
770882                             md_offset  +  sent ,
771883                             msg_length ,
772884                             PTL_ACK_REQ ,
@@ -777,6 +889,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
777889                             NULL ,
778890                             0 );
779891            } else  {
892+                 (void )opal_atomic_add_64 (& module -> opcount , 1 );
780893                ret  =  ompi_osc_portals4_get_dt (origin_dt , & ptl_dt );
781894                if  (OMPI_SUCCESS  !=  ret ) {
782895                    opal_output (ompi_osc_base_framework .framework_output ,
@@ -791,6 +904,8 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
791904                    return  OMPI_ERR_NOT_SUPPORTED ;
792905                }
793906
907+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
908+                              "%s,%d Atomic" , __FUNCTION__ , __LINE__ ));
794909                ret  =  PtlAtomic (module -> md_h ,
795910                                md_offset  +  sent ,
796911                                msg_length ,
@@ -882,6 +997,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
882997
883998                (void )opal_atomic_add_64 (& module -> opcount , 1 );
884999
1000+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1001+                                       "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
8851002                ret  =  PtlSwap (module -> md_h ,
8861003                              result_md_offset  +  sent ,
8871004                              module -> md_h ,
@@ -912,9 +1029,10 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
9121029            do  {
9131030                size_t  msg_length  =  MIN (module -> fetch_atomic_max , length  -  sent );
9141031
915-                 (void )opal_atomic_add_64 (& module -> opcount , 1 );
916- 
917-                 ret  =  PtlGet (module -> md_h ,
1032+                 opal_atomic_add_64 (& module -> opcount , number_of_fragment (msg_length , mca_osc_portals4_component .ptl_max_msg_size ));
1033+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1034+                                      "%s,%d Get" , __FUNCTION__ , __LINE__ ));
1035+                 ret  =  splittedPtlGet (module -> md_h ,
9181036                             md_offset  +  sent ,
9191037                             msg_length ,
9201038                             peer ,
@@ -955,6 +1073,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
9551073
9561074                (void )opal_atomic_add_64 (& module -> opcount , 1 );
9571075
1076+                 OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1077+                                       "%s,%d FetchAtomic" , __FUNCTION__ , __LINE__ ));
9581078                ret  =  PtlFetchAtomic (module -> md_h ,
9591079                                     result_md_offset  +  sent ,
9601080                                     module -> md_h ,
@@ -1025,6 +1145,8 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
10251145
10261146    (void )opal_atomic_add_64 (& module -> opcount , 1 );
10271147
1148+     OPAL_OUTPUT_VERBOSE ((90 ,ompi_osc_base_framework .framework_output ,
1149+                          "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
10281150    ret  =  PtlSwap (module -> md_h ,
10291151                  result_md_offset ,
10301152                  module -> md_h ,
@@ -1087,14 +1209,15 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
10871209
10881210    assert (length  <= module -> fetch_atomic_max );
10891211
1090-     (void )opal_atomic_add_64 (& module -> opcount , 1 );
1091- 
10921212    if  (MPI_REPLACE  ==  op ) {
10931213        ptl_size_t  result_md_offset , origin_md_offset ;
10941214
10951215        result_md_offset  =  (ptl_size_t ) result_addr ;
10961216        origin_md_offset  =  (ptl_size_t ) origin_addr ;
10971217
1218+         (void )opal_atomic_add_64 (& module -> opcount , 1 );
1219+         OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1220+                              "%s,%d Swap" , __FUNCTION__ , __LINE__ ));
10981221        ret  =  PtlSwap (module -> md_h ,
10991222                      result_md_offset ,
11001223                      module -> md_h ,
@@ -1114,7 +1237,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11141237
11151238        md_offset  =  (ptl_size_t ) result_addr ;
11161239
1117-         ret  =  PtlGet (module -> md_h ,
1240+         opal_atomic_add_64 (& module -> opcount , number_of_fragment (length , mca_osc_portals4_component .ptl_max_msg_size ));
1241+         OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1242+                              "%s,%d Get" , __FUNCTION__ , __LINE__ ));
1243+         ret  =  splittedPtlGet (module -> md_h ,
11181244                     md_offset ,
11191245                     length ,
11201246                     peer ,
@@ -1124,6 +1250,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11241250                     NULL );
11251251    } else  {
11261252        ptl_size_t  result_md_offset , origin_md_offset ;
1253+         (void )opal_atomic_add_64 (& module -> opcount , 1 );
11271254
11281255        ret  =  ompi_osc_portals4_get_op (op , & ptl_op );
11291256        if  (OMPI_SUCCESS  !=  ret ) {
@@ -1135,6 +1262,8 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11351262        result_md_offset  =  (ptl_size_t ) result_addr ;
11361263        origin_md_offset  =  (ptl_size_t ) origin_addr ;
11371264
1265+         OPAL_OUTPUT_VERBOSE ((90 , ompi_osc_base_framework .framework_output ,
1266+                              "%s,%d FetchAtomic" , __FUNCTION__ , __LINE__ ));
11381267        ret  =  PtlFetchAtomic (module -> md_h ,
11391268                             result_md_offset ,
11401269                             module -> md_h ,
0 commit comments