[OpenMP][Clang] Support for target math functions

Summary: In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang. We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism. Authors: @gtbercea @jdoerfert Reviewers: hfinkel, caomhin, ABataev, tra Reviewed By: hfinkel, ABataev, tra Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert Tags: #clang Differential Revision: https://reviews.llvm.org/D61399 llvm-svn: 360265
llvm · May 8, 2019 · e62c693 · e62c693
1 parent a3ff572
commit e62c693
Show file tree

Hide file tree

Showing 18 changed files with 693 additions and 433 deletions.
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
@@ -425,7 +425,7 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
       Args.hasArg(options::OPT_fprofile_instr_generate) ||
       Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
       Args.hasArg(options::OPT_fcreate_profile) ||
-      Args.hasArg(options::OPT_forder_file_instrumentation)) 
+      Args.hasArg(options::OPT_forder_file_instrumentation))
     return true;
 
   return false;

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1151,6 +1151,24 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   if (JA.isOffloading(Action::OFK_Cuda))
     getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
 
+  // If we are offloading to a target via OpenMP we need to include the
+  // openmp_wrappers folder which contains alternative system headers.
+  if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
+      getToolChain().getTriple().isNVPTX()){
+    if (!Args.hasArg(options::OPT_nobuiltininc)) {
+      // Add openmp_wrappers/* to our system include path.  This lets us wrap
+      // standard library headers.
+      SmallString<128> P(D.ResourceDir);
+      llvm::sys::path::append(P, "include");
+      llvm::sys::path::append(P, "openmp_wrappers");
+      CmdArgs.push_back("-internal-isystem");
+      CmdArgs.push_back(Args.MakeArgString(P));
+    }
+
+    CmdArgs.push_back("-include");
+    CmdArgs.push_back("__clang_openmp_math.h");
+  }
+
   // Add -i* options, and automatically translate to
   // -include-pch/-include-pth for transparent PCH support. It's
   // wonky, but we include looking for .gch so we can support seamless

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
@@ -128,6 +128,12 @@ set(ppc_wrapper_files
   ppc_wrappers/mmintrin.h
 )
 
+set(openmp_wrapper_files
+  openmp_wrappers/math.h
+  openmp_wrappers/cmath
+  openmp_wrappers/__clang_openmp_math.h
+)
+
 set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include)
 set(out_files)
 set(generated_files)
@@ -156,7 +162,7 @@ endfunction(clang_generate_header)
 
 
 # Copy header files from the source directory to the build directory
-foreach( f ${files} ${cuda_wrapper_files} ${ppc_wrapper_files} )
+foreach( f ${files} ${cuda_wrapper_files} ${ppc_wrapper_files} ${openmp_wrapper_files})
   copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} ${f})
 endforeach( f )
 
@@ -188,6 +194,11 @@ install(
   DESTINATION ${header_install_dir}/ppc_wrappers
   COMPONENT clang-resource-headers)
 
+install(
+  FILES ${openmp_wrapper_files}
+  DESTINATION ${header_install_dir}/openmp_wrappers
+  COMPONENT clang-resource-headers)
+
 if (NOT LLVM_ENABLE_IDE)
   add_llvm_install_targets(install-clang-resource-headers
                            DEPENDS clang-resource-headers

diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h
@@ -30,7 +30,11 @@
 // implementation.  Declaring in the global namespace and pulling into namespace
 // std covers all of the known knowns.
 
+#ifdef _OPENMP
+#define __DEVICE__ static __attribute__((always_inline))
+#else
 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
+#endif
 
 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
 __DEVICE__ long abs(long __n) { return ::labs(__n); }
@@ -47,6 +51,8 @@ __DEVICE__ float exp(float __x) { return ::expf(__x); }
 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
+// TODO: remove when variant is supported
+#ifndef _OPENMP
 __DEVICE__ int fpclassify(float __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
@@ -55,6 +61,7 @@ __DEVICE__ int fpclassify(double __x) {
   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
                               FP_ZERO, __x);
 }
+#endif
 __DEVICE__ float frexp(float __arg, int *__exp) {
   return ::frexpf(__arg, __exp);
 }
@@ -434,7 +441,10 @@ using ::remainderf;
 using ::remquof;
 using ::rintf;
 using ::roundf;
+// TODO: remove once variant is supported
+#ifndef _OPENMP
 using ::scalblnf;
+#endif
 using ::scalbnf;
 using ::sinf;
 using ::sinhf;

diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h
@@ -10,15 +10,21 @@
 #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__
 
+#ifndef _OPENMP
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
 #endif
+#endif
 
 // __DEVICE__ is a helper macro with common set of attributes for the wrappers
 // we implement in this file. We need static in order to avoid emitting unused
 // functions and __forceinline__ helps inlining these wrappers at -O1.
 #pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#define __DEVICE__ static __attribute__((always_inline))
+#else
 #define __DEVICE__ static __device__ __forceinline__
+#endif
 
 // libdevice provides fast low precision and slow full-recision implementations
 // for some functions. Which one gets selected depends on
@@ -38,8 +44,13 @@ __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
 __DEVICE__ unsigned long long __brevll(unsigned long long __a) {
   return __nv_brevll(__a);
 }
+#if defined(__cplusplus)
 __DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
 __DEVICE__ void __brkpt(int __a) { __brkpt(); }
+#else
+__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
+__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
+#endif
 __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
                                     unsigned int __c) {
   return __nv_byte_perm(__a, __b, __c);
@@ -1559,7 +1570,7 @@ __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
 __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
 __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
 #if defined(__LP64__) || defined(_WIN64)
-__DEVICE__ long labs(long __a) { return llabs(__a); };
+__DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
 #else
 __DEVICE__ long labs(long __a) { return __nv_abs(__a); };
 #endif
@@ -1604,12 +1615,16 @@ __DEVICE__ long lround(double __a) { return round(__a); }
 __DEVICE__ long lroundf(float __a) { return roundf(__a); }
 #endif
 __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
+// These functions shouldn't be declared when including this header
+// for math function resolution purposes.
+#ifndef _OPENMP
 __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
   return __builtin_memcpy(__a, __b, __c);
 }
 __DEVICE__ void *memset(void *__a, int __b, size_t __c) {
   return __builtin_memset(__a, __b, __c);
 }
+#endif
 __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
 __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
 __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
@@ -1693,6 +1708,8 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
 __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
 __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
 __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
+// TODO: remove once variant is supported
+#ifndef _OPENMP
 __DEVICE__ double scalbln(double __a, long __b) {
   if (__b > INT_MAX)
     return __a > 0 ? HUGE_VAL : -HUGE_VAL;
@@ -1707,6 +1724,7 @@ __DEVICE__ float scalblnf(float __a, long __b) {
     return __a > 0 ? 0.f : -0.f;
   return scalbnf(__a, (int)__b);
 }
+#endif
 __DEVICE__ double sin(double __a) { return __nv_sin(__a); }
 __DEVICE__ void sincos(double __a, double *__s, double *__c) {
   return __nv_sincos(__a, __s, __c);