Skip to content

Commit

Permalink
[OpenMP][Clang] Support for target math functions
Browse files Browse the repository at this point in the history
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.

We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.

Authors:
@gtbercea
@jdoerfert

Reviewers: hfinkel, caomhin, ABataev, tra

Reviewed By: hfinkel, ABataev, tra

Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61399

llvm-svn: 360265
  • Loading branch information
doru1004 committed May 8, 2019
1 parent a3ff572 commit e62c693
Show file tree
Hide file tree
Showing 18 changed files with 693 additions and 433 deletions.
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChain.cpp
Expand Up @@ -425,7 +425,7 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
Args.hasArg(options::OPT_fprofile_instr_generate) ||
Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_forder_file_instrumentation))
Args.hasArg(options::OPT_forder_file_instrumentation))
return true;

return false;
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -1151,6 +1151,24 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
if (JA.isOffloading(Action::OFK_Cuda))
getToolChain().AddCudaIncludeArgs(Args, CmdArgs);

// If we are offloading to a target via OpenMP we need to include the
// openmp_wrappers folder which contains alternative system headers.
if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
getToolChain().getTriple().isNVPTX()){
if (!Args.hasArg(options::OPT_nobuiltininc)) {
// Add openmp_wrappers/* to our system include path. This lets us wrap
// standard library headers.
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include");
llvm::sys::path::append(P, "openmp_wrappers");
CmdArgs.push_back("-internal-isystem");
CmdArgs.push_back(Args.MakeArgString(P));
}

CmdArgs.push_back("-include");
CmdArgs.push_back("__clang_openmp_math.h");
}

// Add -i* options, and automatically translate to
// -include-pch/-include-pth for transparent PCH support. It's
// wonky, but we include looking for .gch so we can support seamless
Expand Down
13 changes: 12 additions & 1 deletion clang/lib/Headers/CMakeLists.txt
Expand Up @@ -128,6 +128,12 @@ set(ppc_wrapper_files
ppc_wrappers/mmintrin.h
)

set(openmp_wrapper_files
openmp_wrappers/math.h
openmp_wrappers/cmath
openmp_wrappers/__clang_openmp_math.h
)

set(output_dir ${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}/include)
set(out_files)
set(generated_files)
Expand Down Expand Up @@ -156,7 +162,7 @@ endfunction(clang_generate_header)


# Copy header files from the source directory to the build directory
foreach( f ${files} ${cuda_wrapper_files} ${ppc_wrapper_files} )
foreach( f ${files} ${cuda_wrapper_files} ${ppc_wrapper_files} ${openmp_wrapper_files})
copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} ${f})
endforeach( f )

Expand Down Expand Up @@ -188,6 +194,11 @@ install(
DESTINATION ${header_install_dir}/ppc_wrappers
COMPONENT clang-resource-headers)

install(
FILES ${openmp_wrapper_files}
DESTINATION ${header_install_dir}/openmp_wrappers
COMPONENT clang-resource-headers)

if (NOT LLVM_ENABLE_IDE)
add_llvm_install_targets(install-clang-resource-headers
DEPENDS clang-resource-headers
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Headers/__clang_cuda_cmath.h
Expand Up @@ -30,7 +30,11 @@
// implementation. Declaring in the global namespace and pulling into namespace
// std covers all of the known knowns.

#ifdef _OPENMP
#define __DEVICE__ static __attribute__((always_inline))
#else
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
#endif

__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
__DEVICE__ long abs(long __n) { return ::labs(__n); }
Expand All @@ -47,6 +51,8 @@ __DEVICE__ float exp(float __x) { return ::expf(__x); }
__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
// TODO: remove when variant is supported
#ifndef _OPENMP
__DEVICE__ int fpclassify(float __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
Expand All @@ -55,6 +61,7 @@ __DEVICE__ int fpclassify(double __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
#endif
__DEVICE__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
Expand Down Expand Up @@ -434,7 +441,10 @@ using ::remainderf;
using ::remquof;
using ::rintf;
using ::roundf;
// TODO: remove once variant is supported
#ifndef _OPENMP
using ::scalblnf;
#endif
using ::scalbnf;
using ::sinf;
using ::sinhf;
Expand Down
20 changes: 19 additions & 1 deletion clang/lib/Headers/__clang_cuda_device_functions.h
Expand Up @@ -10,15 +10,21 @@
#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__

#ifndef _OPENMP
#if CUDA_VERSION < 9000
#error This file is intended to be used with CUDA-9+ only.
#endif
#endif

// __DEVICE__ is a helper macro with common set of attributes for the wrappers
// we implement in this file. We need static in order to avoid emitting unused
// functions and __forceinline__ helps inlining these wrappers at -O1.
#pragma push_macro("__DEVICE__")
#ifdef _OPENMP
#define __DEVICE__ static __attribute__((always_inline))
#else
#define __DEVICE__ static __device__ __forceinline__
#endif

// libdevice provides fast low precision and slow full-recision implementations
// for some functions. Which one gets selected depends on
Expand All @@ -38,8 +44,13 @@ __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
__DEVICE__ unsigned long long __brevll(unsigned long long __a) {
return __nv_brevll(__a);
}
#if defined(__cplusplus)
__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
#else
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
#endif
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
unsigned int __c) {
return __nv_byte_perm(__a, __b, __c);
Expand Down Expand Up @@ -1559,7 +1570,7 @@ __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
#if defined(__LP64__) || defined(_WIN64)
__DEVICE__ long labs(long __a) { return llabs(__a); };
__DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
#else
__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
#endif
Expand Down Expand Up @@ -1604,12 +1615,16 @@ __DEVICE__ long lround(double __a) { return round(__a); }
__DEVICE__ long lroundf(float __a) { return roundf(__a); }
#endif
__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }
// These functions shouldn't be declared when including this header
// for math function resolution purposes.
#ifndef _OPENMP
__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {
return __builtin_memcpy(__a, __b, __c);
}
__DEVICE__ void *memset(void *__a, int __b, size_t __c) {
return __builtin_memset(__a, __b, __c);
}
#endif
__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }
__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
Expand Down Expand Up @@ -1693,6 +1708,8 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
// TODO: remove once variant is supported
#ifndef _OPENMP
__DEVICE__ double scalbln(double __a, long __b) {
if (__b > INT_MAX)
return __a > 0 ? HUGE_VAL : -HUGE_VAL;
Expand All @@ -1707,6 +1724,7 @@ __DEVICE__ float scalblnf(float __a, long __b) {
return __a > 0 ? 0.f : -0.f;
return scalbnf(__a, (int)__b);
}
#endif
__DEVICE__ double sin(double __a) { return __nv_sin(__a); }
__DEVICE__ void sincos(double __a, double *__s, double *__c) {
return __nv_sincos(__a, __s, __c);
Expand Down

0 comments on commit e62c693

Please sign in to comment.