Skip to content

Commit c418190

Browse files
committed
osc/portals4: Be sure that the ME are operationnal (wait for the PTL_EVENT_LINK)
1 parent e99e7d0 commit c418190

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

ompi/mca/osc/portals4/osc_portals4.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ struct ompi_osc_portals4_component_t {
5454
ptl_size_t matching_fetch_atomic_max;
5555
ptl_size_t matching_atomic_ordered_size;
5656
ptl_uid_t uid;
57+
opal_mutex_t lock;
58+
opal_condition_t cond;
5759

5860
opal_free_list_t requests; /* request free list for the r* communication variants */
5961
};
@@ -83,6 +85,7 @@ struct ompi_osc_portals4_module_t {
8385
ptl_handle_ni_t ni_h; /* network interface used by this window */
8486
ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */
8587
ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */
88+
int ct_link; /* PTL_EVENT_LINK flag */
8689
ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */
8790
ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */
8891
ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */

ompi/mca/osc/portals4/osc_portals4_component.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,13 @@ progress_callback(void)
218218
count++;
219219

220220
if (NULL != ev.user_ptr) {
221+
/* be sure that we receive the PTL_EVENT_LINK */
222+
if (ev.type == PTL_EVENT_LINK) {
223+
*(int *)ev.user_ptr = *(int *)ev.user_ptr + 1;
224+
opal_condition_broadcast(&mca_osc_portals4_component.cond);
225+
continue;
226+
}
227+
221228
req = (ompi_osc_portals4_request_t*) ev.user_ptr;
222229
opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength);
223230
ops = opal_atomic_add_32(&req->ops_committed, 1);
@@ -503,7 +510,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
503510
module->pt_idx,
504511
&me,
505512
PTL_PRIORITY_LIST,
506-
NULL,
513+
&module->ct_link,
507514
&module->data_me_h);
508515
if (PTL_OK != ret) {
509516
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
@@ -526,7 +533,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
526533
module->pt_idx,
527534
&me,
528535
PTL_PRIORITY_LIST,
529-
NULL,
536+
&module->ct_link,
530537
&module->control_me_h);
531538
if (PTL_OK != ret) {
532539
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
@@ -574,6 +581,13 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
574581
PtlAtomicSync();
575582

576583
/* Make sure that everyone's ready to receive. */
584+
OPAL_THREAD_LOCK(&mca_osc_portals4_component.lock);
585+
while (module->ct_link != 2) {
586+
opal_condition_wait(&mca_osc_portals4_component.cond,
587+
&mca_osc_portals4_component.lock);
588+
}
589+
OPAL_THREAD_UNLOCK(&mca_osc_portals4_component.lock);
590+
577591
module->comm->c_coll.coll_barrier(module->comm,
578592
module->comm->c_coll.coll_barrier_module);
579593

@@ -618,6 +632,7 @@ ompi_osc_portals4_free(struct ompi_win_t *win)
618632
module->comm->c_coll.coll_barrier_module);
619633

620634
/* cleanup */
635+
PtlMEUnlink(module->control_me_h);
621636
PtlMEUnlink(module->data_me_h);
622637
PtlMDRelease(module->md_h);
623638
PtlMDRelease(module->req_md_h);

0 commit comments

Comments
 (0)