diff --git a/openmp/libomptarget/DeviceRTL/include/Mapping.h b/openmp/libomptarget/DeviceRTL/include/Mapping.h index 4f65d28da513f..36cfae7c5efa4 100644 --- a/openmp/libomptarget/DeviceRTL/include/Mapping.h +++ b/openmp/libomptarget/DeviceRTL/include/Mapping.h @@ -79,7 +79,12 @@ uint32_t getNumberOfWarpsInBlock(); uint32_t getBlockId(); /// Return the block size, thus number of threads in the block. +/// +/// Note: The version taking \p IsSPMD mode explicitly can be used during the +/// initialization of the target region, that is before `mapping::isSPMDMode()` +/// can be called by any thread other than the main one. uint32_t getBlockSize(); +uint32_t getBlockSize(bool IsSPMD); /// Return the number of blocks in the kernel. uint32_t getNumberOfBlocks(); diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp index 65b554b729731..8b7a8a2495c45 100644 --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -100,7 +100,7 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, // doing any work. mapping::getBlockSize() does not include any of the main // thread's warp, so none of its threads can ever be active worker threads. if (UseGenericStateMachine && - mapping::getThreadIdInBlock() < mapping::getBlockSize()) + mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) genericStateMachine(Ident); return mapping::getThreadIdInBlock(); diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index 75a500f39d20a..7f9f837ae98e4 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -212,11 +212,14 @@ uint32_t mapping::getThreadIdInBlock() { uint32_t mapping::getWarpSize() { return impl::getWarpSize(); } -uint32_t mapping::getBlockSize() { +uint32_t mapping::getBlockSize(bool IsSPMD) { uint32_t BlockSize = mapping::getNumberOfProcessorElements() - - (!mapping::isSPMDMode() * impl::getWarpSize()); + (!IsSPMD * impl::getWarpSize()); return BlockSize; } +uint32_t mapping::getBlockSize() { + return mapping::getBlockSize(mapping::isSPMDMode()); +} uint32_t mapping::getKernelSize() { return impl::getKernelSize(); } diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index 800176eb5eda5..a04f5cccb1738 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -236,7 +236,7 @@ struct TeamStateTy { TeamStateTy SHARED(TeamState); void TeamStateTy::init(bool IsSPMD) { - ICVState.NThreadsVar = mapping::getBlockSize(); + ICVState.NThreadsVar = mapping::getBlockSize(IsSPMD); ICVState.LevelVar = 0; ICVState.ActiveLevelVar = 0; ICVState.MaxActiveLevelsVar = 1;