Skip to content

Commit

Permalink
Block library shutdown until unreaped threads finish spin-waiting
Browse files Browse the repository at this point in the history
This change fixes possibly invalid access to the internal data structure during
library shutdown.  In a heavily oversubscribed situation, the library shutdown
sequence can reach the point where resources are deallocated while there still
exist threads in their final spinning loop.  The added loop in
__kmp_internal_end() checks if there are such busy-waiting threads and blocks
the shutdown sequence if that is the case. Two versions of kmp_wait_template()
are now used to minimize performance impact.

Patch by Hansang Bae

Differential Revision: https://reviews.llvm.org/D49452

llvm-svn: 337486
  • Loading branch information
jpeyton52 committed Jul 19, 2018
1 parent a57d713 commit a764af6
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 11 deletions.
3 changes: 3 additions & 0 deletions openmp/runtime/src/kmp.h
Expand Up @@ -2558,6 +2558,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
#if KMP_STATS_ENABLED
kmp_stats_list *th_stats;
#endif
#if KMP_OS_UNIX
std::atomic<bool> th_blocking;
#endif
} kmp_base_info_t;

typedef union KMP_ALIGN_CACHE kmp_info {
Expand Down
15 changes: 15 additions & 0 deletions openmp/runtime/src/kmp_runtime.cpp
Expand Up @@ -4334,6 +4334,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,

new_thr->th.th_spin_here = FALSE;
new_thr->th.th_next_waiting = 0;
#if KMP_OS_UNIX
new_thr->th.th_blocking = false;
#endif

#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
Expand Down Expand Up @@ -5961,6 +5964,18 @@ static void __kmp_internal_end(void) {

__kmp_reap_task_teams();

#if KMP_OS_UNIX
// Threads that are not reaped should not access any resources since they
// are going to be deallocated soon, so the shutdown sequence should wait
// until all threads either exit the final spin-waiting loop or begin
// sleeping after the given blocktime.
for (i = 0; i < __kmp_threads_capacity; i++) {
kmp_info_t *thr = __kmp_threads[i];
while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
KMP_CPU_PAUSE();
}
#endif

for (i = 0; i < __kmp_threads_capacity; ++i) {
// TBD: Add some checking...
// Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
Expand Down
8 changes: 6 additions & 2 deletions openmp/runtime/src/kmp_wait_release.cpp
Expand Up @@ -15,8 +15,12 @@

void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
__kmp_wait_template(this_thr, flag,
final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
if (final_spin)
__kmp_wait_template<kmp_flag_64, TRUE>(
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
else
__kmp_wait_template<kmp_flag_64, FALSE>(
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
}

void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); }
46 changes: 37 additions & 9 deletions openmp/runtime/src/kmp_wait_release.h
Expand Up @@ -154,10 +154,10 @@ static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
/* Spin wait loop that first does pause, then yield, then sleep. A thread that
calls __kmp_wait_* must make certain that another thread calls __kmp_release
to wake it back up to prevent deadlocks! */
template <class C>
template <class C, int final_spin>
static inline void
__kmp_wait_template(kmp_info_t *this_thr, C *flag,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
__kmp_wait_template(kmp_info_t *this_thr,
C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
// NOTE: We may not belong to a team at this point.
volatile void *spin = flag->get();
kmp_uint32 spins;
Expand All @@ -176,6 +176,10 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
return;
}
th_gtid = this_thr->th.th_info.ds.ds_gtid;
#if KMP_OS_UNIX
if (final_spin)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
#endif
KA_TRACE(20,
("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
#if KMP_STATS_ENABLED
Expand Down Expand Up @@ -409,7 +413,15 @@ final_spin=FALSE)
#endif

KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
#if KMP_OS_UNIX
if (final_spin)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
#endif
flag->suspend(th_gtid);
#if KMP_OS_UNIX
if (final_spin)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
#endif

if (TCR_4(__kmp_global.g.g_done)) {
if (__kmp_global.g.g_abort)
Expand Down Expand Up @@ -450,6 +462,10 @@ final_spin=FALSE)
}
#endif

#if KMP_OS_UNIX
if (final_spin)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
#endif
KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
}

Expand Down Expand Up @@ -732,8 +748,12 @@ class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
}
void wait(kmp_info_t *this_thr,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
__kmp_wait_template(this_thr, this,
final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
if (final_spin)
__kmp_wait_template<kmp_flag_32, TRUE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else
__kmp_wait_template<kmp_flag_32, FALSE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
}
void release() { __kmp_release_template(this); }
flag_type get_ptr_type() { return flag32; }
Expand All @@ -757,8 +777,12 @@ class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
}
void wait(kmp_info_t *this_thr,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
__kmp_wait_template(this_thr, this,
final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
if (final_spin)
__kmp_wait_template<kmp_flag_64, TRUE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else
__kmp_wait_template<kmp_flag_64, FALSE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
}
void release() { __kmp_release_template(this); }
flag_type get_ptr_type() { return flag64; }
Expand Down Expand Up @@ -845,8 +869,12 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
bool is_sleeping() { return is_sleeping_val(*get()); }
bool is_any_sleeping() { return is_sleeping_val(*get()); }
void wait(kmp_info_t *this_thr, int final_spin) {
__kmp_wait_template<kmp_flag_oncore>(
this_thr, this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
if (final_spin)
__kmp_wait_template<kmp_flag_oncore, TRUE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else
__kmp_wait_template<kmp_flag_oncore, FALSE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
}
void release() { __kmp_release_template(this); }
void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
Expand Down

0 comments on commit a764af6

Please sign in to comment.