Skip to content

Commit

Permalink
[nfc][libomptarget] Move named_sync() into target_impl
Browse files Browse the repository at this point in the history
Summary: [nfc][libomptarget] Move named_sync() into target_impl

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: ABataev

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D69487
  • Loading branch information
JonChesterfield committed Oct 30, 2019
1 parent db8dad2 commit 8548e2f
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 20 deletions.
2 changes: 1 addition & 1 deletion openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
Expand Up @@ -765,7 +765,7 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) {
// is started, so we don't need a barrier.
if (NumThreads > 1) {
#endif
named_sync(L1_BARRIER, WARPSIZE * NumWarps);
__kmpc_impl_named_sync(L1_BARRIER, WARPSIZE * NumWarps);
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
Expand Up @@ -256,7 +256,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
// If we guard this barrier as follows it leads to deadlock, probably
// because of a compiler bug: if (!IsGenericMode()) __syncthreads();
uint16_t SyncWarps = (NumThreads + WARPSIZE - 1) / WARPSIZE;
named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
__kmpc_impl_named_sync(L1_BARRIER, SyncWarps * WARPSIZE);

// If this team is not the last, quit.
if (/* Volatile read by all threads */ !IsLastTeam)
Expand Down
5 changes: 0 additions & 5 deletions openmp/libomptarget/deviceRTLs/nvptx/src/support.h
Expand Up @@ -83,11 +83,6 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
#define SUB_BYTES(_addr, _bytes) \
((void *)((char *)((void *)(_addr)) - (_bytes)))

////////////////////////////////////////////////////////////////////////////////
// Named Barrier Routines
////////////////////////////////////////////////////////////////////////////////
INLINE void named_sync(const int barrier, const int num_threads);

////////////////////////////////////////////////////////////////////////////////
// Teams Reduction Scratchpad Helpers
////////////////////////////////////////////////////////////////////////////////
Expand Down
11 changes: 0 additions & 11 deletions openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
Expand Up @@ -268,17 +268,6 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
return NULL;
}

////////////////////////////////////////////////////////////////////////////////
// Named Barrier Routines
////////////////////////////////////////////////////////////////////////////////

INLINE void named_sync(const int barrier, const int num_threads) {
asm volatile("bar.sync %0, %1;"
:
: "r"(barrier), "r"(num_threads)
: "memory");
}

////////////////////////////////////////////////////////////////////////////////
// Teams Reduction Scratchpad Helpers
////////////////////////////////////////////////////////////////////////////////
Expand Down
4 changes: 2 additions & 2 deletions openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
Expand Up @@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
"call kmpc_barrier with %d omp threads, sync parameter %d\n",
(int)numberOfActiveOMPThreads, (int)threads);
// Barrier #1 is for synchronization among active threads.
named_sync(L1_BARRIER, threads);
__kmpc_impl_named_sync(L1_BARRIER, threads);
}
} else {
// Still need to flush the memory per the standard.
Expand Down Expand Up @@ -92,7 +92,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
"%d\n",
(int)numberOfActiveOMPThreads, (int)threads);
// Barrier #1 is for synchronization among active threads.
named_sync(L1_BARRIER, threads);
__kmpc_impl_named_sync(L1_BARRIER, threads);
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
}

Expand Down
7 changes: 7 additions & 0 deletions openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Expand Up @@ -153,4 +153,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
#endif // CUDA_VERSION
}

INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
asm volatile("bar.sync %0, %1;"
:
: "r"(barrier), "r"(num_threads)
: "memory");
}

#endif

0 comments on commit 8548e2f

Please sign in to comment.