diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 1a38492aacbc0..540139dbcd042 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -234,12 +234,12 @@ endfunction() # Generate a Bitcode library for all the compute capabilities the user requested add_custom_target(omptarget.devicertl.nvptx) foreach(sm ${nvptx_sm_list}) - compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64-nvidia-cuda -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0") + compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64-nvidia-cuda -DLIBOMPTARGET_BC_TARGET -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0") endforeach() add_custom_target(omptarget.devicertl.amdgpu) foreach(mcpu ${amdgpu_mcpus}) - compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -nogpulib) + compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib) endforeach() set(LIBOMPTARGET_LLVM_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}") diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h index 34f3e924a3a26..fcb75e31c455e 100644 --- a/openmp/libomptarget/DeviceRTL/include/Types.h +++ b/openmp/libomptarget/DeviceRTL/include/Types.h @@ -209,6 +209,13 @@ enum OMPTgtExecModeFlags : int8_t { #define CONSTANT(NAME) \ NAME [[clang::loader_uninitialized, clang::address_space(4)]] +// Attribute to keep alive certain definition for the bitcode library. +#ifdef LIBOMPTARGET_BC_TARGET +#define KEEP_ALIVE __attribute__((used, retain)) +#else +#define KEEP_ALIVE +#endif + ///} #endif diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index 12ef58fbb60d3..172bbbff68f8e 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -276,7 +276,7 @@ uint32_t mapping::getNumberOfProcessorElements() { // TODO: This is a workaround for initialization coming from kernels outside of // the TU. We will need to solve this more correctly in the future. -int __attribute__((used, retain, weak)) SHARED(IsSPMDMode); +int __attribute__((weak)) KEEP_ALIVE SHARED(IsSPMDMode); void mapping::init(bool IsSPMD) { if (mapping::isInitialThreadInLevel0(IsSPMD)) diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp index e6bcba811f801..453d13198ef3c 100644 --- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -21,7 +21,7 @@ using namespace _OMP; namespace _OMP { /// Helper to keep code alive without introducing a performance penalty. -__attribute__((used, retain, weak, optnone, cold)) void keepAlive() { +__attribute__((weak, optnone, cold)) KEEP_ALIVE void keepAlive() { __kmpc_get_hardware_thread_id_in_block(); __kmpc_get_hardware_num_threads_in_block(); __kmpc_get_warp_size();