diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index 8f26af086e714..2f50530e79a1d 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -372,4 +372,12 @@ __attribute__((noinline)) uint32_t __kmpc_get_warp_size() { } } +#define _TGT_KERNEL_LANGUAGE(NAME, MAPPER_NAME) \ + extern "C" int ompx_##NAME(int Dim) { return mapping::MAPPER_NAME(Dim); } + +_TGT_KERNEL_LANGUAGE(thread_id, getThreadIdInBlock) +_TGT_KERNEL_LANGUAGE(thread_dim, getNumberOfThreadsInBlock) +_TGT_KERNEL_LANGUAGE(block_id, getBlockIdInKernel) +_TGT_KERNEL_LANGUAGE(block_dim, getNumberOfBlocksInKernel) + #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports index 85fd459fee1b1..2d13195aa7dc8 100644 --- a/openmp/libomptarget/DeviceRTL/src/exports +++ b/openmp/libomptarget/DeviceRTL/src/exports @@ -1,4 +1,5 @@ omp_* +ompx_* *llvm_* __kmpc_* diff --git a/openmp/libomptarget/test/api/ompx_3d.c b/openmp/libomptarget/test/api/ompx_3d.c new file mode 100644 index 0000000000000..a67ad01835809 --- /dev/null +++ b/openmp/libomptarget/test/api/ompx_3d.c @@ -0,0 +1,41 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include +#include +#include + +void foo(int device) { + int tid = 0, bid = 0, bdim = 0; +#pragma omp target teams distribute parallel for map(from \ + : tid, bid, bdim) \ + device(device) thread_limit(2) num_teams(5) + for (int i = 0; i < 1000; ++i) { + if (i == 42) { + tid = ompx_thread_dim_x(); + bid = ompx_block_id_x(); + bdim = ompx_block_dim_x(); + } + } + // CHECK: tid: 2, bid: 1, bdim: 5 + // CHECK: tid: 2, bid: 0, bdim: 1 + printf("tid: %i, bid: %i, bdim: %i\n", tid, bid, bdim); +} + +int isGPU() { return 0; } +#pragma omp declare variant(isGPU) match(device = {arch(gpu)}) +int isGPUvariant() { return 1; } + +int defaultIsGPU() { + int r = 0; +#pragma omp target map(from : r) + r = isGPU(); + return r; +} + +int main() { + if (defaultIsGPU()) + foo(omp_get_default_device()); + else + printf("tid: 2, bid: 1, bdim: 5\n"); + foo(omp_get_initial_device()); +} diff --git a/openmp/libomptarget/test/api/ompx_3d.cpp b/openmp/libomptarget/test/api/ompx_3d.cpp new file mode 100644 index 0000000000000..8b2f62239e6a3 --- /dev/null +++ b/openmp/libomptarget/test/api/ompx_3d.cpp @@ -0,0 +1,41 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +#include +#include +#include + +void foo(int device) { + int tid = 0, bid = 0, bdim = 0; +#pragma omp target teams distribute parallel for map(from \ + : tid, bid, bdim) \ + device(device) thread_limit(2) num_teams(5) + for (int i = 0; i < 1000; ++i) { + if (i == 42) { + tid = ompx::thread_dim_x(); + bid = ompx::block_id_x(); + bdim = ompx::block_dim_x(); + } + } + // CHECK: tid: 2, bid: 1, bdim: 5 + // CHECK: tid: 2, bid: 0, bdim: 1 + printf("tid: %i, bid: %i, bdim: %i\n", tid, bid, bdim); +} + +int isGPU() { return 0; } +#pragma omp declare variant(isGPU) match(device = {arch(gpu)}) +int isGPUvariant() { return 1; } + +int defaultIsGPU() { + int r = 0; +#pragma omp target map(from : r) + r = isGPU(); + return r; +} + +int main() { + if (defaultIsGPU()) + foo(omp_get_default_device()); + else + printf("tid: 2, bid: 1, bdim: 5\n"); + foo(omp_get_initial_device()); +} diff --git a/openmp/runtime/cmake/LibompExports.cmake b/openmp/runtime/cmake/LibompExports.cmake index 97ecc5d691ff5..dbeb18f358f1a 100644 --- a/openmp/runtime/cmake/LibompExports.cmake +++ b/openmp/runtime/cmake/LibompExports.cmake @@ -50,6 +50,7 @@ set(LIBOMP_EXPORTS_LIB_DIR "${LIBOMP_EXPORTS_DIR}/${libomp_platform}${libomp_suf add_custom_command(TARGET omp POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBOMP_EXPORTS_CMN_DIR} COMMAND ${CMAKE_COMMAND} -E copy omp.h ${LIBOMP_EXPORTS_CMN_DIR} + COMMAND ${CMAKE_COMMAND} -E copy ompx.h ${LIBOMP_EXPORTS_CMN_DIR} ) if(${LIBOMP_OMPT_SUPPORT}) add_custom_command(TARGET omp POST_BUILD diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index bb58222645147..8b2445ac58bf0 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -12,6 +12,7 @@ include(ExtendPath) # Configure omp.h, kmp_config.h and omp-tools.h if necessary configure_file(${LIBOMP_INC_DIR}/omp.h.var omp.h @ONLY) +configure_file(${LIBOMP_INC_DIR}/ompx.h.var ompx.h @ONLY) configure_file(kmp_config.h.cmake kmp_config.h @ONLY) if(${LIBOMP_OMPT_SUPPORT}) configure_file(${LIBOMP_INC_DIR}/omp-tools.h.var omp-tools.h @ONLY) @@ -393,6 +394,7 @@ endif() install( FILES ${CMAKE_CURRENT_BINARY_DIR}/omp.h + ${CMAKE_CURRENT_BINARY_DIR}/ompx.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} ) if(${LIBOMP_OMPT_SUPPORT}) diff --git a/openmp/runtime/src/include/ompx.h.var b/openmp/runtime/src/include/ompx.h.var new file mode 100644 index 0000000000000..ea17e6e77cfac --- /dev/null +++ b/openmp/runtime/src/include/ompx.h.var @@ -0,0 +1,110 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __OMPX_H +#define __OMPX_H + +#ifdef __cplusplus +extern "C" { +#endif + +int omp_get_ancestor_thread_num(int); +int omp_get_team_size(int); + +#ifdef __cplusplus +} +#endif + +/// Target kernel language extensions +/// +/// These extensions exist for the host to allow fallback implementations, +/// however, they cannot be arbitrarily composed with OpenMP. If the rules of +/// the kernel language are followed, the host fallbacks should behave as +/// expected since the kernel is represented as 3 sequential outer loops, one +/// for each grid dimension, and three (nested) parallel loops, one for each +/// block dimension. This fallback is not supposed to be optimal and should be +/// configurable by the user. +/// +///{ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + ompx_dim_x = 0, + ompx_dim_y = 1, + ompx_dim_z = 2, +}; + +/// ompx_{thread,block}_{id,dim} +///{ +#pragma omp begin declare variant match(device = {kind(cpu)}) +#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \ + static inline int ompx_##NAME(int Dim) { return VALUE; } + +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id, + omp_get_ancestor_thread_num(Dim + 1)) +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_dim, omp_get_team_size(Dim + 1)) +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0) +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, 1) +#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C +///} + +#pragma omp end declare variant + +/// ompx_{thread,block}_{id,dim}_{x,y,z} +///{ +#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \ + int ompx_##NAME(int Dim); \ + static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \ + static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \ + static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); } + +_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id) +_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_dim) +_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id) +_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim) +#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C +///} + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus + +namespace ompx { + +enum { + dim_x = ompx_dim_x, + dim_y = ompx_dim_y, + dim_z = ompx_dim_z, +}; + +/// ompx::{thread,block}_{id,dim}_{,x,y,z} +///{ +#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \ + static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \ + static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \ + static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \ + static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); } + +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id) +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_dim) +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id) +_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim) +#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX +///} + +} // namespace ompx +#endif + +///} + +#endif /* __OMPX_H */