Skip to content

Commit

Permalink
[OpenMP][Clang] Support for target math functions
Browse files Browse the repository at this point in the history
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.

We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.

Authors:
@gtbercea
@jdoerfert

Reviewers: hfinkel, caomhin, ABataev, tra

Reviewed By: hfinkel, ABataev, tra

Subscribers: mgorny, guansong, cfe-commits, jdoerfert

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61399

llvm-svn: 360063
  • Loading branch information
doru1004 authored and MrSidims committed May 17, 2019
1 parent 2dd7a42 commit dfe3536
Show file tree
Hide file tree
Showing 16 changed files with 660 additions and 431 deletions.
3 changes: 3 additions & 0 deletions clang/lib/Headers/CMakeLists.txt
Expand Up @@ -33,6 +33,9 @@ set(files
avxintrin.h
bmi2intrin.h
bmiintrin.h
openmp_wrappers/math.h
openmp_wrappers/cmath
openmp_wrappers/__clang_openmp_math.h
__clang_cuda_builtin_vars.h
__clang_cuda_cmath.h
__clang_cuda_complex_builtins.h
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Headers/__clang_cuda_cmath.h
Expand Up @@ -30,7 +30,11 @@
// implementation. Declaring in the global namespace and pulling into namespace
// std covers all of the known knowns.

#ifdef _OPENMP
#define __DEVICE__ static __attribute__((always_inline))
#else
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
#endif

__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
__DEVICE__ long abs(long __n) { return ::labs(__n); }
Expand All @@ -47,6 +51,8 @@ __DEVICE__ float exp(float __x) { return ::expf(__x); }
__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
// TODO: remove when variant is supported
#ifndef _OPENMP
__DEVICE__ int fpclassify(float __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
Expand All @@ -55,6 +61,7 @@ __DEVICE__ int fpclassify(double __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
#endif
__DEVICE__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
Expand Down Expand Up @@ -434,7 +441,10 @@ using ::remainderf;
using ::remquof;
using ::rintf;
using ::roundf;
// TODO: remove once variant is supported
#ifndef _OPENMP
using ::scalblnf;
#endif
using ::scalbnf;
using ::sinf;
using ::sinhf;
Expand Down
16 changes: 15 additions & 1 deletion clang/lib/Headers/__clang_cuda_device_functions.h
Expand Up @@ -10,15 +10,21 @@
#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__

#ifndef _OPENMP
#if CUDA_VERSION < 9000
#error This file is intended to be used with CUDA-9+ only.
#endif
#endif

// __DEVICE__ is a helper macro with common set of attributes for the wrappers
// we implement in this file. We need static in order to avoid emitting unused
// functions and __forceinline__ helps inlining these wrappers at -O1.
#pragma push_macro("__DEVICE__")
#ifdef _OPENMP
#define __DEVICE__ static __attribute__((always_inline))
#else
#define __DEVICE__ static __device__ __forceinline__
#endif

// libdevice provides fast low precision and slow full-recision implementations
// for some functions. Which one gets selected depends on
Expand All @@ -38,8 +44,13 @@ __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
__DEVICE__ unsigned long long __brevll(unsigned long long __a) {
return __nv_brevll(__a);
}
#if defined(__cplusplus)
__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
#else
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
#endif
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
unsigned int __c) {
return __nv_byte_perm(__a, __b, __c);
Expand Down Expand Up @@ -1559,7 +1570,7 @@ __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }
__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }
__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }
#if defined(__LP64__) || defined(_WIN64)
__DEVICE__ long labs(long __a) { return llabs(__a); };
__DEVICE__ long labs(long __a) { return __nv_llabs(__a); };
#else
__DEVICE__ long labs(long __a) { return __nv_abs(__a); };
#endif
Expand Down Expand Up @@ -1693,6 +1704,8 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }
__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
// TODO: remove once variant is supported
#ifndef _OPENMP
__DEVICE__ double scalbln(double __a, long __b) {
if (__b > INT_MAX)
return __a > 0 ? HUGE_VAL : -HUGE_VAL;
Expand All @@ -1707,6 +1720,7 @@ __DEVICE__ float scalblnf(float __a, long __b) {
return __a > 0 ? 0.f : -0.f;
return scalbnf(__a, (int)__b);
}
#endif
__DEVICE__ double sin(double __a) { return __nv_sin(__a); }
__DEVICE__ void sincos(double __a, double *__s, double *__c) {
return __nv_sincos(__a, __s, __c);
Expand Down

0 comments on commit dfe3536

Please sign in to comment.