Skip to content

Commit

Permalink
[OpenMP][NFC] Reorganize the ompx::mapping layer in the GPU runtime
Browse files Browse the repository at this point in the history
This change makes the naming more consistent, I hope.
  • Loading branch information
jdoerfert committed Jul 31, 2023
1 parent 0a68cd2 commit 1f3a28d
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 86 deletions.
3 changes: 3 additions & 0 deletions openmp/libomptarget/DeviceRTL/include/Debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ void __assert_fail(const char *expr, const char *msg, const char *file,
else \
__assert_assume(expr); \
}
#define UNREACHABLE(msg) \
PRINT(msg); \
__builtin_trap();

///}

Expand Down
42 changes: 26 additions & 16 deletions openmp/libomptarget/DeviceRTL/include/Mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ namespace ompx {

namespace mapping {

enum {
DIM_X = 0,
DIM_Y = 1,
DIM_Z = 2,
};

#pragma omp begin declare target device_type(nohost)

inline constexpr uint32_t MaxThreadsPerTeam = 1024;
Expand Down Expand Up @@ -63,34 +69,38 @@ LaneMaskTy lanemaskGT();
/// Return the thread Id in the warp, in [0, getWarpSize()).
uint32_t getThreadIdInWarp();

/// Return the thread Id in the block, in [0, getBlockSize()).
uint32_t getThreadIdInBlock();

/// Return the warp id in the block.
uint32_t getWarpId();

/// Return the warp size, thus number of threads in the warp.
uint32_t getWarpSize();

/// Return the warp id in the block, in [0, getNumberOfWarpsInBlock()]
uint32_t getWarpIdInBlock();

/// Return the number of warps in the block.
uint32_t getNumberOfWarpsInBlock();

/// Return the block Id in the kernel, in [0, getKernelSize()).
uint32_t getBlockId();
/// Return the thread Id in the block, in [0, getNumberOfThreadsInBlock(Dim)).
uint32_t getThreadIdInBlock(int32_t Dim = DIM_X);

/// Return the block size, thus number of threads in the block.
///
/// Note: The version taking \p IsSPMD mode explicitly can be used during the
/// initialization of the target region, that is before `mapping::isSPMDMode()`
/// can be called by any thread other than the main one.
uint32_t getBlockSize();
uint32_t getBlockSize(bool IsSPMD);
uint32_t getNumberOfThreadsInBlock(int32_t Dim = DIM_X);

/// Return the block Id in the kernel, in [0, getNumberOfBlocksInKernel(Dim)).
uint32_t getBlockIdInKernel(int32_t Dim = DIM_X);

/// Return the number of blocks in the kernel.
uint32_t getNumberOfBlocks();
uint32_t getNumberOfBlocksInKernel(int32_t Dim = DIM_X);

/// Return the kernel size, thus number of threads in the kernel.
uint32_t getKernelSize();
uint32_t getNumberOfThreadsInKernel();

/// Return the maximal number of threads in the block usable for a team (=
/// parallel region).
///
/// Note: The version taking \p IsSPMD mode explicitly can be used during the
/// initialization of the target region, that is before `mapping::isSPMDMode()`
/// can be called by any thread other than the main one.
uint32_t getMaxTeamThreads();
uint32_t getMaxTeamThreads(bool IsSPMD);

/// Return the number of processing elements on the device.
uint32_t getNumberOfProcessorElements();
Expand Down
6 changes: 4 additions & 2 deletions openmp/libomptarget/DeviceRTL/src/Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ void __assert_fail(const char *expr, const char *msg, const char *file,
DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
const char *Function) {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) {
mapping::getThreadIdInBlock() == 0 &&
mapping::getBlockIdInKernel() == 0) {

uint16_t &Level =
state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
Expand All @@ -56,7 +57,8 @@ DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,

DebugEntryRAII::~DebugEntryRAII() {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) {
mapping::getThreadIdInBlock() == 0 &&
mapping::getBlockIdInKernel() == 0) {
uint16_t &Level =
state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
Level--;
Expand Down
7 changes: 4 additions & 3 deletions openmp/libomptarget/DeviceRTL/src/Kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,11 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
// reaches its corresponding synchronize::threads call: that would permit all
// active worker threads to proceed before the main thread has actually set
// state::ParallelRegionFn, and then they would immediately quit without
// doing any work. mapping::getBlockSize() does not include any of the main
// thread's warp, so none of its threads can ever be active worker threads.
// doing any work. mapping::getMaxTeamThreads() does not include any of the
// main thread's warp, so none of its threads can ever be active worker
// threads.
if (UseGenericStateMachine &&
mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) {
mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD)) {
genericStateMachine(KernelEnvironment.Ident);
} else {
// Retrieve the work function just to ensure we always call
Expand Down

0 comments on commit 1f3a28d

Please sign in to comment.