diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h index 123c15161ca47f..a2f1d37fdad2f3 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h @@ -126,7 +126,7 @@ #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING #include -#include "option.h" +#include "target_impl.h" template NOINLINE static void log(const char *fmt, Arguments... parameters) { diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 336206aa9413e8..5006aa4a6ccefb 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -23,7 +23,6 @@ #include "target_impl.h" #include "debug.h" // debug #include "interface.h" // interfaces with omp, compiler, and user -#include "option.h" // choices we have #include "state-queue.h" #include "support.h" diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h deleted file mode 100644 index 37d1134f44bca5..00000000000000 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h +++ /dev/null @@ -1,62 +0,0 @@ -//===------------ option.h - NVPTX OpenMP GPU options ------------ CUDA -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// GPU default options -// -//===----------------------------------------------------------------------===// -#ifndef _OPTION_H_ -#define _OPTION_H_ - -#include "interface.h" - -//////////////////////////////////////////////////////////////////////////////// -// Kernel options -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -// The following def must match the absolute limit hardwired in the host RTL -// max number of threads per team -#define MAX_THREADS_PER_TEAM 1024 - -#define WARPSIZE 32 - -// The named barrier for active parallel threads of a team in an L1 parallel -// region to synchronize with each other. -#define L1_BARRIER (1) - -// Maximum number of preallocated arguments to an outlined parallel/simd function. -// Anything more requires dynamic memory allocation. -#define MAX_SHARED_ARGS 20 - -// Maximum number of omp state objects per SM allocated statically in global -// memory. -#if __CUDA_ARCH__ >= 700 -#define OMP_STATE_COUNT 32 -#define MAX_SM 84 -#elif __CUDA_ARCH__ >= 600 -#define OMP_STATE_COUNT 32 -#define MAX_SM 56 -#else -#define OMP_STATE_COUNT 16 -#define MAX_SM 16 -#endif - -#define OMP_ACTIVE_PARALLEL_LEVEL 128 - -//////////////////////////////////////////////////////////////////////////////// -// algo options -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -// misc options (by def everythig here is device) -//////////////////////////////////////////////////////////////////////////////// - -#define INLINE __forceinline__ __device__ -#define NOINLINE __noinline__ __device__ - -#endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h index 9d7576bcd76e29..8320929cfaf3a9 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h @@ -21,7 +21,7 @@ #include -#include "option.h" // choices we have +#include "target_impl.h" template class omptarget_nvptx_Queue { private: diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 4e7dc4e72ceb3a..1a5d69a3ad57fd 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -15,7 +15,42 @@ #include #include -#include "option.h" +#define INLINE __forceinline__ __device__ +#define NOINLINE __noinline__ __device__ + +//////////////////////////////////////////////////////////////////////////////// +// Kernel options +//////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// The following def must match the absolute limit hardwired in the host RTL +// max number of threads per team +#define MAX_THREADS_PER_TEAM 1024 + +#define WARPSIZE 32 + +// The named barrier for active parallel threads of a team in an L1 parallel +// region to synchronize with each other. +#define L1_BARRIER (1) + +// Maximum number of preallocated arguments to an outlined parallel/simd function. +// Anything more requires dynamic memory allocation. +#define MAX_SHARED_ARGS 20 + +// Maximum number of omp state objects per SM allocated statically in global +// memory. +#if __CUDA_ARCH__ >= 700 +#define OMP_STATE_COUNT 32 +#define MAX_SM 84 +#elif __CUDA_ARCH__ >= 600 +#define OMP_STATE_COUNT 32 +#define MAX_SM 56 +#else +#define OMP_STATE_COUNT 16 +#define MAX_SM 16 +#endif + +#define OMP_ACTIVE_PARALLEL_LEVEL 128 // Data sharing related quantities, need to match what is used in the compiler. enum DATA_SHARING_SIZES {