diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 586ec0da21..9e2b884a6f 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -633,22 +633,21 @@ static void mca_btl_vader_progress_waiting (mca_btl_base_endpoint_t *ep) return; } - OPAL_THREAD_LOCK(&ep->lock); + OPAL_THREAD_LOCK(&ep->pending_frags_lock); OPAL_LIST_FOREACH_SAFE(frag, next, &ep->pending_frags, mca_btl_vader_frag_t) { - OPAL_THREAD_UNLOCK(&ep->lock); ret = vader_fifo_write_ep (frag->hdr, ep); if (!ret) { + OPAL_THREAD_UNLOCK(&ep->pending_frags_lock); return; } - OPAL_THREAD_LOCK(&ep->lock); (void) opal_list_remove_first (&ep->pending_frags); } ep->waiting = false; opal_list_remove_item (&mca_btl_vader_component.pending_endpoints, &ep->super); - OPAL_THREAD_UNLOCK(&ep->lock); + OPAL_THREAD_UNLOCK(&ep->pending_frags_lock); } /** diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index bc57d2d95a..2fd957dfbb 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -62,7 +62,7 @@ typedef struct mca_btl_base_endpoint_t { int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing * SMP specfic data structures. */ - uint32_t send_count; /**< number of fragments sent to this peer */ + volatile uint64_t send_count; /**< number of fragments sent to this peer */ char *segment_base; /**< start of the peer's segment (in the address space * of this process) */ @@ -84,6 +84,7 @@ typedef struct mca_btl_base_endpoint_t { } other; } segment_data; + opal_mutex_t pending_frags_lock; /**< protect pending_frags */ opal_list_t pending_frags; /**< fragments pending fast box space */ bool waiting; /**< endpoint is on the component wait list */ } mca_btl_base_endpoint_t; @@ -94,15 +95,15 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_endpoint_t); static inline void mca_btl_vader_endpoint_setup_fbox_recv (struct mca_btl_base_endpoint_t *endpoint, void *base) { - endpoint->fbox_in.buffer = base; endpoint->fbox_in.startp = (uint32_t *) base; endpoint->fbox_in.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_in.seq = 0; + opal_atomic_wmb (); + endpoint->fbox_in.buffer = base; } static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_endpoint_t *endpoint, void *base) { - endpoint->fbox_out.buffer = base; endpoint->fbox_out.start = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.end = MCA_BTL_VADER_FBOX_ALIGNMENT; endpoint->fbox_out.startp = (uint32_t *) base; @@ -111,6 +112,9 @@ static inline void mca_btl_vader_endpoint_setup_fbox_send (struct mca_btl_base_e /* zero out the first header in the fast box */ memset ((char *) base + MCA_BTL_VADER_FBOX_ALIGNMENT, 0, MCA_BTL_VADER_FBOX_ALIGNMENT); + + opal_atomic_wmb (); + endpoint->fbox_out.buffer = base; } #endif /* MCA_BTL_VADER_ENDPOINT_H */ diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index d646263054..dfef75bb38 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -117,6 +117,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t if (OPAL_UNLIKELY(buffer_free < size)) { ep->fbox_out.end = (hbs << 31) | end; + opal_atomic_wmb (); OPAL_THREAD_UNLOCK(&ep->lock); return NULL; } @@ -141,6 +142,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t /* align the buffer */ ep->fbox_out.end = ((uint32_t) hbs << 31) | end; + opal_atomic_wmb (); OPAL_THREAD_UNLOCK(&ep->lock); return dst + sizeof (mca_btl_vader_fbox_hdr_t); @@ -247,6 +249,7 @@ static inline bool mca_btl_vader_check_fboxes (void) /* save where we left off */ /* let the sender know where we stopped */ + opal_atomic_mb (); ep->fbox_in.start = ep->fbox_in.startp[0] = ((uint32_t) hbs << 31) | start; processed = true; } @@ -258,8 +261,7 @@ static inline bool mca_btl_vader_check_fboxes (void) static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr) { - if (NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == ++ep->send_count) { - + if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD64 ((volatile int64_t *) &ep->send_count, 1))) { /* protect access to mca_btl_vader_component.segment_offset */ OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index 094eb505f1..1464d007b4 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -524,12 +524,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_ static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) { OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); + OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t); ep->fifo = NULL; } static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep) { OBJ_DESTRUCT(&ep->pending_frags); + OBJ_DESTRUCT(&ep->pending_frags_lock); #if OPAL_BTL_VADER_HAVE_XPMEM if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { diff --git a/opal/mca/btl/vader/btl_vader_send.c b/opal/mca/btl/vader/btl_vader_send.c index 59a10c366a..08bfa5a623 100644 --- a/opal/mca/btl/vader/btl_vader_send.c +++ b/opal/mca/btl/vader/btl_vader_send.c @@ -57,7 +57,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, /* post the relative address of the descriptor into the peer's fifo */ if (opal_list_get_size (&endpoint->pending_frags) || !vader_fifo_write_ep (frag->hdr, endpoint)) { frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - OPAL_THREAD_LOCK(&endpoint->lock); + OPAL_THREAD_LOCK(&endpoint->pending_frags_lock); opal_list_append (&endpoint->pending_frags, (opal_list_item_t *) frag); if (!endpoint->waiting) { OPAL_THREAD_LOCK(&mca_btl_vader_component.lock); @@ -65,7 +65,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock); endpoint->waiting = true; } - OPAL_THREAD_UNLOCK(&endpoint->lock); + OPAL_THREAD_UNLOCK(&endpoint->pending_frags_lock); return OPAL_SUCCESS; }