Skip to content

Commit 7aaf16e

Browse files
committed
osc/portals4: Put/Get splitting because Portals4 may restrict sizes
1 parent 025201b commit 7aaf16e

File tree

3 files changed

+175
-28
lines changed

3 files changed

+175
-28
lines changed

ompi/mca/osc/portals4/osc_portals4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ struct ompi_osc_portals4_component_t {
5353
ptl_size_t matching_atomic_max;
5454
ptl_size_t matching_fetch_atomic_max;
5555
ptl_size_t matching_atomic_ordered_size;
56+
ptl_size_t ptl_max_msg_size; /* max size given by portals (cf PtlNIInit) */
5657
ptl_uid_t uid;
5758
opal_mutex_t lock;
5859
opal_condition_t cond;

ompi/mca/osc/portals4/osc_portals4_comm.c

Lines changed: 157 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,97 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
178178
return 0;
179179
}
180180

181+
static ptl_size_t
182+
number_of_fragment(ptl_size_t length, ptl_size_t maxlength)
183+
{
184+
ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1;
185+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
186+
"%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag));
187+
return nb_frag;
188+
}
189+
190+
static int
191+
splittedPtlPut(ptl_handle_md_t md_h,
192+
ptl_size_t loc_offset,
193+
ptl_size_t length,
194+
ptl_ack_req_t ack_req,
195+
ptl_process_t target_id,
196+
ptl_pt_index_t pt_index,
197+
ptl_match_bits_t match_b,
198+
ptl_size_t rem_offset,
199+
void *usr_ptr,
200+
ptl_hdr_data_t hdr_data)
201+
{
202+
ptl_size_t length_sent = 0;
203+
do {
204+
ptl_size_t length_frag;
205+
int ret;
206+
207+
length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ?
208+
mca_osc_portals4_component.ptl_max_msg_size :
209+
length;
210+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
211+
"Put size : %lu/%lu, offset:%lu", length_frag, length, length_sent));
212+
ret = PtlPut(md_h,
213+
loc_offset + length_sent,
214+
length_frag,
215+
ack_req,
216+
target_id,
217+
pt_index,
218+
match_b,
219+
rem_offset + length_sent,
220+
usr_ptr,
221+
hdr_data);
222+
if (PTL_OK != ret) {
223+
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
224+
"%s:%d PtlPut failed with return value %d",
225+
__FUNCTION__, __LINE__, ret);
226+
return ret;
227+
}
228+
length -= length_frag;
229+
length_sent += length_frag;
230+
} while (length);
231+
return PTL_OK;
232+
}
233+
234+
static int
235+
splittedPtlGet(ptl_handle_md_t md_h,
236+
ptl_size_t loc_offset,
237+
ptl_size_t length,
238+
ptl_process_t target_id,
239+
ptl_pt_index_t pt_index,
240+
ptl_match_bits_t match_b,
241+
ptl_size_t rem_offset,
242+
void *usr_ptr)
243+
{
244+
ptl_size_t length_submitted = 0;
245+
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "Get"));
246+
247+
do {
248+
ptl_size_t length_frag;
249+
int ret;
250+
length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ?
251+
mca_osc_portals4_component.ptl_max_msg_size :
252+
length;
253+
ret = PtlGet(md_h,
254+
(ptl_size_t) loc_offset + length_submitted,
255+
length_frag,
256+
target_id,
257+
pt_index,
258+
match_b,
259+
rem_offset + length_submitted,
260+
usr_ptr);
261+
if (PTL_OK != ret) {
262+
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
263+
"%s:%d PtlGet failed with return value %d",
264+
__FUNCTION__, __LINE__, ret);
265+
return ret;
266+
}
267+
length -= length_frag;
268+
length_submitted += length_frag;
269+
} while (length);
270+
return PTL_OK;
271+
}
181272

182273
int
183274
ompi_osc_portals4_rput(const void *origin_addr,
@@ -218,15 +309,18 @@ ompi_osc_portals4_rput(const void *origin_addr,
218309
"MPI_Rput: transfer of non-contiguous memory is not currently supported.\n");
219310
return OMPI_ERR_NOT_SUPPORTED;
220311
} else {
221-
(void)opal_atomic_add_64(&module->opcount, 1);
222-
request->ops_expected = 1;
223312
ret = ompi_datatype_type_size(origin_dt, &length);
224313
if (OMPI_SUCCESS != ret) {
225314
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
226315
return ret;
227316
}
228317
length *= origin_count;
229-
ret = PtlPut(module->req_md_h,
318+
request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size);
319+
opal_atomic_add_64(&module->opcount, request->ops_expected);
320+
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
321+
"%s,%d Put", __FUNCTION__, __LINE__));
322+
323+
ret = splittedPtlPut(module->req_md_h,
230324
(ptl_size_t) origin_addr,
231325
length,
232326
PTL_ACK_REQ,
@@ -285,15 +379,17 @@ ompi_osc_portals4_rget(void *origin_addr,
285379
"MPI_Rget: transfer of non-contiguous memory is not currently supported.\n");
286380
return OMPI_ERR_NOT_SUPPORTED;
287381
} else {
288-
(void)opal_atomic_add_64(&module->opcount, 1);
289-
request->ops_expected = 1;
290382
ret = ompi_datatype_type_size(origin_dt, &length);
291383
if (OMPI_SUCCESS != ret) {
292384
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
293385
return ret;
294386
}
295387
length *= origin_count;
296-
ret = PtlGet(module->req_md_h,
388+
request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size);
389+
opal_atomic_add_64(&module->opcount, request->ops_expected);
390+
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
391+
"%s,%d Get", __FUNCTION__, __LINE__));
392+
ret = splittedPtlGet(module->req_md_h,
297393
(ptl_size_t) origin_addr,
298394
length,
299395
peer,
@@ -368,11 +464,13 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
368464

369465
do {
370466
size_t msg_length = MIN(module->atomic_max, length - sent);
371-
(void)opal_atomic_add_64(&module->opcount, 1);
372-
request->ops_expected++;
373467

374468
if (MPI_REPLACE == op) {
375-
ret = PtlPut(module->req_md_h,
469+
request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size);
470+
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
471+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
472+
"%s,%d Put", __FUNCTION__, __LINE__));
473+
ret = splittedPtlPut(module->req_md_h,
376474
md_offset + sent,
377475
msg_length,
378476
PTL_ACK_REQ,
@@ -383,6 +481,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
383481
request,
384482
0);
385483
} else {
484+
request->ops_expected++;
485+
opal_atomic_add_64(&module->opcount, 1);
386486
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
387487
if (OMPI_SUCCESS != ret) {
388488
opal_output(ompi_osc_base_framework.framework_output,
@@ -396,7 +496,8 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
396496
"MPI_Raccumulate: operation is not currently supported");
397497
return OMPI_ERR_NOT_SUPPORTED;
398498
}
399-
499+
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
500+
"%s,%d Atomic", __FUNCTION__, __LINE__));
400501
ret = PtlAtomic(module->req_md_h,
401502
offset + sent,
402503
msg_length,
@@ -499,6 +600,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
499600
(void)opal_atomic_add_64(&module->opcount, 1);
500601
request->ops_expected++;
501602

603+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
604+
"%s,%d Swap", __FUNCTION__, __LINE__));
502605
ret = PtlSwap(module->req_md_h,
503606
result_md_offset + sent,
504607
module->md_h,
@@ -530,10 +633,11 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
530633
do {
531634
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
532635

533-
(void)opal_atomic_add_64(&module->opcount, 1);
534-
request->ops_expected++;
535-
536-
ret = PtlGet(module->req_md_h,
636+
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
637+
request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size);
638+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
639+
"%s,%d Get", __FUNCTION__, __LINE__));
640+
ret = splittedPtlGet(module->req_md_h,
537641
md_offset + sent,
538642
msg_length,
539643
peer,
@@ -558,14 +662,14 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
558662

559663
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
560664
if (OMPI_SUCCESS != ret) {
561-
opal_output(ompi_osc_base_framework.framework_output,
665+
opal_output(ompi_osc_base_framework.framework_output,
562666
"MPI_Rget_accumulate: datatype is not currently supported");
563667
return OMPI_ERR_NOT_SUPPORTED;
564668
}
565669

566670
ret = ompi_osc_portals4_get_op(op, &ptl_op);
567671
if (OMPI_SUCCESS != ret) {
568-
opal_output(ompi_osc_base_framework.framework_output,
672+
opal_output(ompi_osc_base_framework.framework_output,
569673
"MPI_Rget_accumulate: operation is not currently supported");
570674
return OMPI_ERR_NOT_SUPPORTED;
571675
}
@@ -576,6 +680,8 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
576680
(void)opal_atomic_add_64(&module->opcount, 1);
577681
request->ops_expected++;
578682

683+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
684+
"%s,%d FetchAtomic", __FUNCTION__, __LINE__));
579685
ret = PtlFetchAtomic(module->req_md_h,
580686
result_md_offset + sent,
581687
module->md_h,
@@ -634,13 +740,15 @@ ompi_osc_portals4_put(const void *origin_addr,
634740
"MPI_Put: transfer of non-contiguous memory is not currently supported.\n");
635741
return OMPI_ERR_NOT_SUPPORTED;
636742
} else {
637-
(void)opal_atomic_add_64(&module->opcount, 1);
638743
ret = ompi_datatype_type_size(origin_dt, &length);
639744
if (OMPI_SUCCESS != ret) {
640745
return ret;
641746
}
642747
length *= origin_count;
643-
ret = PtlPut(module->md_h,
748+
opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size));
749+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
750+
"%s,%d Put", __FUNCTION__, __LINE__));
751+
ret = splittedPtlPut(module->md_h,
644752
(ptl_size_t) origin_addr,
645753
length,
646754
PTL_ACK_REQ,
@@ -691,13 +799,15 @@ ompi_osc_portals4_get(void *origin_addr,
691799
"MPI_Get: transfer of non-contiguous memory is not currently supported.\n");
692800
return OMPI_ERR_NOT_SUPPORTED;
693801
} else {
694-
(void)opal_atomic_add_64(&module->opcount, 1);
695802
ret = ompi_datatype_type_size(origin_dt, &length);
696803
if (OMPI_SUCCESS != ret) {
697804
return ret;
698805
}
699806
length *= origin_count;
700-
ret = PtlGet(module->md_h,
807+
opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size));
808+
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
809+
"%s,%d Get", __FUNCTION__, __LINE__));
810+
ret = splittedPtlGet(module->md_h,
701811
(ptl_size_t) origin_addr,
702812
length,
703813
peer,
@@ -763,10 +873,12 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
763873

764874
do {
765875
size_t msg_length = MIN(module->atomic_max, length - sent);
766-
(void)opal_atomic_add_64(&module->opcount, 1);
767876

768877
if (MPI_REPLACE == op) {
769-
ret = PtlPut(module->md_h,
878+
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
879+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
880+
"%s,%d Put", __FUNCTION__, __LINE__));
881+
ret = splittedPtlPut(module->md_h,
770882
md_offset + sent,
771883
msg_length,
772884
PTL_ACK_REQ,
@@ -777,6 +889,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
777889
NULL,
778890
0);
779891
} else {
892+
(void)opal_atomic_add_64(&module->opcount, 1);
780893
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
781894
if (OMPI_SUCCESS != ret) {
782895
opal_output(ompi_osc_base_framework.framework_output,
@@ -791,6 +904,8 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
791904
return OMPI_ERR_NOT_SUPPORTED;
792905
}
793906

907+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
908+
"%s,%d Atomic", __FUNCTION__, __LINE__));
794909
ret = PtlAtomic(module->md_h,
795910
md_offset + sent,
796911
msg_length,
@@ -882,6 +997,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
882997

883998
(void)opal_atomic_add_64(&module->opcount, 1);
884999

1000+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1001+
"%s,%d Swap", __FUNCTION__, __LINE__));
8851002
ret = PtlSwap(module->md_h,
8861003
result_md_offset + sent,
8871004
module->md_h,
@@ -912,9 +1029,10 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
9121029
do {
9131030
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
9141031

915-
(void)opal_atomic_add_64(&module->opcount, 1);
916-
917-
ret = PtlGet(module->md_h,
1032+
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
1033+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1034+
"%s,%d Get", __FUNCTION__, __LINE__));
1035+
ret = splittedPtlGet(module->md_h,
9181036
md_offset + sent,
9191037
msg_length,
9201038
peer,
@@ -955,6 +1073,8 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
9551073

9561074
(void)opal_atomic_add_64(&module->opcount, 1);
9571075

1076+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1077+
"%s,%d FetchAtomic", __FUNCTION__, __LINE__));
9581078
ret = PtlFetchAtomic(module->md_h,
9591079
result_md_offset + sent,
9601080
module->md_h,
@@ -1025,6 +1145,8 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
10251145

10261146
(void)opal_atomic_add_64(&module->opcount, 1);
10271147

1148+
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
1149+
"%s,%d Swap", __FUNCTION__, __LINE__));
10281150
ret = PtlSwap(module->md_h,
10291151
result_md_offset,
10301152
module->md_h,
@@ -1087,14 +1209,15 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
10871209

10881210
assert(length <= module->fetch_atomic_max);
10891211

1090-
(void)opal_atomic_add_64(&module->opcount, 1);
1091-
10921212
if (MPI_REPLACE == op) {
10931213
ptl_size_t result_md_offset, origin_md_offset;
10941214

10951215
result_md_offset = (ptl_size_t) result_addr;
10961216
origin_md_offset = (ptl_size_t) origin_addr;
10971217

1218+
(void)opal_atomic_add_64(&module->opcount, 1);
1219+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1220+
"%s,%d Swap", __FUNCTION__, __LINE__));
10981221
ret = PtlSwap(module->md_h,
10991222
result_md_offset,
11001223
module->md_h,
@@ -1114,7 +1237,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11141237

11151238
md_offset = (ptl_size_t) result_addr;
11161239

1117-
ret = PtlGet(module->md_h,
1240+
opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size));
1241+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1242+
"%s,%d Get", __FUNCTION__, __LINE__));
1243+
ret = splittedPtlGet(module->md_h,
11181244
md_offset,
11191245
length,
11201246
peer,
@@ -1124,6 +1250,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11241250
NULL);
11251251
} else {
11261252
ptl_size_t result_md_offset, origin_md_offset;
1253+
(void)opal_atomic_add_64(&module->opcount, 1);
11271254

11281255
ret = ompi_osc_portals4_get_op(op, &ptl_op);
11291256
if (OMPI_SUCCESS != ret) {
@@ -1135,6 +1262,8 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
11351262
result_md_offset = (ptl_size_t) result_addr;
11361263
origin_md_offset = (ptl_size_t) origin_addr;
11371264

1265+
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1266+
"%s,%d FetchAtomic", __FUNCTION__, __LINE__));
11381267
ret = PtlFetchAtomic(module->md_h,
11391268
result_md_offset,
11401269
module->md_h,

0 commit comments

Comments
 (0)