From cb70954896622789cc89fc1057d160ca4d8f5fd3 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 21 Sep 2016 07:42:48 -0400 Subject: [PATCH] Fix MT wait-sync. Prevent a race condition between a thread checking count and then going in cond_wait, and another thread setting the count to 0 and signaling the condition. Thanks to Pascal Deveze for catching up the bug and for the initial patch. (cherry picked from commit 131fe42db8d760a3c218bb2ea092d0a9256ea1e8) --- opal/threads/wait_sync.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/opal/threads/wait_sync.c b/opal/threads/wait_sync.c index c9b91372442..31361c6964c 100644 --- a/opal/threads/wait_sync.c +++ b/opal/threads/wait_sync.c @@ -25,12 +25,24 @@ static ompi_wait_sync_t* wait_sync_list = NULL; int sync_wait_mt(ompi_wait_sync_t *sync) { + /* Don't stop if the waiting synchronization is completed. We avoid the + * race condition around the release of the synchronization using the + * signaling field. + */ if(sync->count <= 0) return (0 == sync->status) ? OPAL_SUCCESS : OPAL_ERROR; /* lock so nobody can signal us during the list updating */ pthread_mutex_lock(&sync->lock); + /* Now that we hold the lock make sure another thread has not already + * call cond_signal. + */ + if(sync->count <= 0) { + pthread_mutex_unlock(&sync->lock); + return (0 == sync->status) ? OPAL_SUCCESS : OPAL_ERROR; + } + /* Insert sync on the list of pending synchronization constructs */ OPAL_THREAD_LOCK(&wait_sync_lock); if( NULL == wait_sync_list ) {