diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index e0c8cf24104407..a53920436901f2 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -4301,6 +4301,39 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); +#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG + // suppress race conditions detection on synchronization flags in debug mode + // this helps to analyze library internals eliminating false positives + __itt_suppress_mark_range( + __itt_suppress_range, __itt_suppress_threading_errors, + &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc)); + __itt_suppress_mark_range( + __itt_suppress_range, __itt_suppress_threading_errors, + &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state)); +#if KMP_OS_WINDOWS + __itt_suppress_mark_range( + __itt_suppress_range, __itt_suppress_threading_errors, + &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init)); +#else + __itt_suppress_mark_range(__itt_suppress_range, + __itt_suppress_threading_errors, + &new_thr->th.th_suspend_init_count, + sizeof(new_thr->th.th_suspend_init_count)); +#endif + // TODO: check if we need to also suppress b_arrived flags + __itt_suppress_mark_range(__itt_suppress_range, + __itt_suppress_threading_errors, + CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go), + sizeof(new_thr->th.th_bar[0].bb.b_go)); + __itt_suppress_mark_range(__itt_suppress_range, + __itt_suppress_threading_errors, + CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go), + sizeof(new_thr->th.th_bar[1].bb.b_go)); + __itt_suppress_mark_range(__itt_suppress_range, + __itt_suppress_threading_errors, + CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go), + sizeof(new_thr->th.th_bar[2].bb.b_go)); +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ if (__kmp_storage_map) { __kmp_print_thread_storage_map(new_thr, new_gtid); } diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 2ddc2e7a6fd7da..c5a3744ad27ba7 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -420,7 +420,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); TCW_4(thread_data->td.td_deque_ntasks, TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count - + KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self + KMP_FSYNC_RELEASING(taskdata); // releasing child KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " "task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->td.td_deque_ntasks, @@ -1560,6 +1561,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, else kmp_itt_count_task = 0; // thread is not on a barrier - skip timing } + KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task) #endif #ifdef KMP_GOMP_COMPAT @@ -1577,11 +1579,12 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, // Barrier imbalance - adjust arrive time with the task duration thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); } + KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed) + KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent #endif } - // Proxy tasks are not handled by the runtime if (taskdata->td_flags.proxy != TASK_PROXY) { ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); @@ -1883,6 +1886,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, #if USE_ITT_BUILD if (itt_sync_obj != NULL) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); + KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children #endif /* USE_ITT_BUILD */ // Debugger: The taskwait is completed. Location remains, but thread is @@ -2521,6 +2525,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { #if USE_ITT_BUILD if (itt_sync_obj != NULL) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); + KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants #endif /* USE_ITT_BUILD */ } KMP_DEBUG_ASSERT(taskgroup->count == 0); @@ -3341,15 +3346,25 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread, KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating " "task team for team %p\n", __kmp_gtid_from_thread(thread), team)); - // Allocate a new task team if one is not available. - // Cannot use __kmp_thread_malloc() because threads not around for - // kmp_reap_task_team( ). + // Allocate a new task team if one is not available. Cannot use + // __kmp_thread_malloc because threads not around for kmp_reap_task_team. task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t)); __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock); - // AC: __kmp_allocate zeroes returned memory - // task_team -> tt.tt_threads_data = NULL; - // task_team -> tt.tt_max_threads = 0; - // task_team -> tt.tt_next = NULL; +#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG + // suppress race conditions detection on synchronization flags in debug mode + // this helps to analyze library internals eliminating false positives + __itt_suppress_mark_range( + __itt_suppress_range, __itt_suppress_threading_errors, + &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks)); + __itt_suppress_mark_range(__itt_suppress_range, + __itt_suppress_threading_errors, + CCAST(kmp_uint32 *, &task_team->tt.tt_active), + sizeof(task_team->tt.tt_active)); +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */ + // Note: __kmp_allocate zeroes returned memory, othewise we would need: + // task_team->tt.tt_threads_data = NULL; + // task_team->tt.tt_max_threads = 0; + // task_team->tt.tt_next = NULL; } TCW_4(task_team->tt.tt_found_tasks, FALSE);