Skip to content

Commit

Permalink
[OpenMP] Add ompx wrappers for __syncthreads
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D156729
  • Loading branch information
jdoerfert committed Jul 31, 2023
1 parent daef6d3 commit deb0ea3
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 0 deletions.
10 changes: 10 additions & 0 deletions openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,16 @@ void omp_set_lock(omp_lock_t *Lock) { impl::setLock(Lock); }
void omp_unset_lock(omp_lock_t *Lock) { impl::unsetLock(Lock); }

int omp_test_lock(omp_lock_t *Lock) { return impl::testLock(Lock); }

void ompx_sync_block(int Ordering) {
impl::syncThreadsAligned(atomic::OrderingTy(Ordering));
}
void ompx_sync_block_acq_rel() {
impl::syncThreadsAligned(atomic::OrderingTy::acq_rel);
}
void ompx_sync_block_divergent(int Ordering) {
impl::syncThreads(atomic::OrderingTy(Ordering));
}
} // extern "C"

#pragma omp end declare target
42 changes: 42 additions & 0 deletions openmp/libomptarget/test/api/ompx_sync.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %libomptarget-compile-run-and-check-generic

#include <omp.h>
#include <ompx.h>
#include <stdio.h>

void foo(int device) {
int X;
// clang-format off
#pragma omp target teams map(from: X) device(device) thread_limit(2) num_teams(1)
#pragma omp parallel
// clang-format on
{
int tid = ompx_thread_id_x();
int bid = ompx_block_id_x();
if (tid == 1 && bid == 0) {
X = 42;
ompx_sync_block_divergent(3);
} else {
ompx_sync_block_divergent(1);
}
if (tid == 0 && bid == 0)
X++;
ompx_sync_block(ompx_seq_cst);
if (tid == 1 && bid == 0)
X++;
ompx_sync_block_acq_rel();
if (tid == 0 && bid == 0)
X++;
ompx_sync_block(ompx_release);
if (tid == 0 && bid == 0)
X++;
}
// CHECK: X: 46
// CHECK: X: 46
printf("X: %i\n", X);
}

int main() {
foo(omp_get_default_device());
foo(omp_get_initial_device());
}
42 changes: 42 additions & 0 deletions openmp/libomptarget/test/api/ompx_sync.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %libomptarget-compilexx-run-and-check-generic

#include <omp.h>
#include <ompx.h>
#include <stdio.h>

void foo(int device) {
int X;
// clang-format off
#pragma omp target teams map(from: X) device(device) thread_limit(2) num_teams(1)
#pragma omp parallel
// clang-format on
{
int tid = ompx::thread_id_x();
int bid = ompx::block_id_x();
if (tid == 1 && bid == 0) {
X = 42;
ompx::sync_block_divergent(3);
} else {
ompx::sync_block_divergent();
}
if (tid == 0 && bid == 0)
X++;
ompx::sync_block(ompx::seq_cst);
if (tid == 1 && bid == 0)
X++;
ompx::sync_block();
if (tid == 0 && bid == 0)
X++;
ompx_sync_block(ompx_release);
if (tid == 0 && bid == 0)
X++;
}
// CHECK: X: 46
// CHECK: X: 46
printf("X: %i\n", X);
}

int main() {
foo(omp_get_default_device());
foo(omp_get_initial_device());
}
55 changes: 55 additions & 0 deletions openmp/runtime/src/include/ompx.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ int omp_get_team_size(int);
extern "C" {
#endif

enum {
ompx_relaxed = __ATOMIC_RELAXED,
ompx_aquire = __ATOMIC_ACQUIRE,
ompx_release = __ATOMIC_RELEASE,
ompx_acq_rel = __ATOMIC_ACQ_REL,
ompx_seq_cst = __ATOMIC_SEQ_CST,
};

enum {
ompx_dim_x = 0,
ompx_dim_y = 1,
Expand All @@ -56,8 +64,33 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, 1)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
///}

/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \
static inline RETTY ompx_##NAME(ARGS) { BODY; }

_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
_Pragma("omp barrier"));
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
ompx_sync_block(ompx_acq_rel));
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
ompx_sync_block(Ordering));
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
///}

#pragma omp end declare variant

/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
RETTY ompx_##NAME(ARGS);

_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering);
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void);
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering);
#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
///}

/// ompx_{thread,block}_{id,dim}_{x,y,z}
///{
#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \
Expand Down Expand Up @@ -87,6 +120,14 @@ enum {
dim_z = ompx_dim_z,
};

enum {
relaxed = ompx_relaxed ,
aquire = ompx_aquire,
release = ompx_release,
acc_rel = ompx_acq_rel,
seq_cst = ompx_seq_cst,
};

/// ompx::{thread,block}_{id,dim}_{,x,y,z}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \
Expand All @@ -102,6 +143,20 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
///}

/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \
static inline RETTY NAME(ARGS) { \
return ompx_##NAME(CALL_ARGS); \
}

_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
Ordering);
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
int Ordering = acc_rel, Ordering);
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
///}

} // namespace ompx
#endif

Expand Down

0 comments on commit deb0ea3

Please sign in to comment.