-
Notifications
You must be signed in to change notification settings - Fork 15.4k
clang: Use generic builtins in cuda complex builtins header #171106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
clang: Use generic builtins in cuda complex builtins header #171106
Conversation
There's no reason to use the ocml or nv prefixed functions and maintain this list of alias macros. I left these macros in for NVPTX in the scalbn and logb case, since those have a special case hack in the AMDGPU codegen and probably do not work on ptx.
|
@llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesThere's no reason to use the ocml or nv prefixed functions and Patch is 22.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171106.diff 5 Files Affected:
diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index 7bc7bc2ce63e1..e3038dcb8fd36 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -23,63 +23,17 @@
#define __DEVICE__ __device__ inline
#endif
-// To make the algorithms available for C and C++ in CUDA and OpenMP we select
-// different but equivalent function versions. TODO: For OpenMP we currently
-// select the native builtins as the overload support for templates is lacking.
-#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
-#define _ISNANd std::isnan
-#define _ISNANf std::isnan
-#define _ISINFd std::isinf
-#define _ISINFf std::isinf
-#define _ISFINITEd std::isfinite
-#define _ISFINITEf std::isfinite
-#define _COPYSIGNd std::copysign
-#define _COPYSIGNf std::copysign
-#define _SCALBNd std::scalbn
-#define _SCALBNf std::scalbn
-#define _ABSd std::abs
-#define _ABSf std::abs
-#define _LOGBd std::logb
-#define _LOGBf std::logb
-// Rather than pulling in std::max from algorithm everytime, use available ::max.
-#define _fmaxd max
-#define _fmaxf max
-#else
-#ifdef __AMDGCN__
-#define _ISNANd __ocml_isnan_f64
-#define _ISNANf __ocml_isnan_f32
-#define _ISINFd __ocml_isinf_f64
-#define _ISINFf __ocml_isinf_f32
-#define _ISFINITEd __ocml_isfinite_f64
-#define _ISFINITEf __ocml_isfinite_f32
-#define _COPYSIGNd __ocml_copysign_f64
-#define _COPYSIGNf __ocml_copysign_f32
-#define _SCALBNd __ocml_scalbn_f64
-#define _SCALBNf __ocml_scalbn_f32
-#define _ABSd __ocml_fabs_f64
-#define _ABSf __ocml_fabs_f32
-#define _LOGBd __ocml_logb_f64
-#define _LOGBf __ocml_logb_f32
-#define _fmaxd __ocml_fmax_f64
-#define _fmaxf __ocml_fmax_f32
-#else
-#define _ISNANd __nv_isnand
-#define _ISNANf __nv_isnanf
-#define _ISINFd __nv_isinfd
-#define _ISINFf __nv_isinff
-#define _ISFINITEd __nv_isfinited
-#define _ISFINITEf __nv_finitef
-#define _COPYSIGNd __nv_copysign
-#define _COPYSIGNf __nv_copysignf
+#ifdef __NVPTX__
+// FIXME: NVPTX should use generic builtins.
#define _SCALBNd __nv_scalbn
#define _SCALBNf __nv_scalbnf
-#define _ABSd __nv_fabs
-#define _ABSf __nv_fabsf
#define _LOGBd __nv_logb
#define _LOGBf __nv_logbf
-#define _fmaxd __nv_fmax
-#define _fmaxf __nv_fmaxf
-#endif
+#else
+#define _SCALBNd __builtin_scalbn
+#define _SCALBNf __builtin_scalbnf
+#define _LOGBd __builtin_logb
+#define _LOGBf __builtin_logbf
#endif
#if defined(__cplusplus)
@@ -95,36 +49,36 @@ __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
double _Complex z;
__real__(z) = __ac - __bd;
__imag__(z) = __ad + __bc;
- if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
int __recalc = 0;
- if (_ISINFd(__a) || _ISINFd(__b)) {
- __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
- __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
- if (_ISNANd(__c))
- __c = _COPYSIGNd(0, __c);
- if (_ISNANd(__d))
- __d = _COPYSIGNd(0, __d);
+ if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
+ __a = __builtin_copysign(__builtin_isinf(__a) ? 1 : 0, __a);
+ __b = __builtin_copysign(__builtin_isinf(__b) ? 1 : 0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysign(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysign(0, __d);
__recalc = 1;
}
- if (_ISINFd(__c) || _ISINFd(__d)) {
- __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
- __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
- if (_ISNANd(__a))
- __a = _COPYSIGNd(0, __a);
- if (_ISNANd(__b))
- __b = _COPYSIGNd(0, __b);
+ if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
+ __c = __builtin_copysign(__builtin_isinf(__c) ? 1 : 0, __c);
+ __d = __builtin_copysign(__builtin_isinf(__d) ? 1 : 0, __d);
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysign(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysign(0, __b);
__recalc = 1;
}
- if (!__recalc &&
- (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
- if (_ISNANd(__a))
- __a = _COPYSIGNd(0, __a);
- if (_ISNANd(__b))
- __b = _COPYSIGNd(0, __b);
- if (_ISNANd(__c))
- __c = _COPYSIGNd(0, __c);
- if (_ISNANd(__d))
- __d = _COPYSIGNd(0, __d);
+ if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
+ __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysign(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysign(0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysign(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysign(0, __d);
__recalc = 1;
}
if (__recalc) {
@@ -145,36 +99,36 @@ __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
float _Complex z;
__real__(z) = __ac - __bd;
__imag__(z) = __ad + __bc;
- if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
int __recalc = 0;
- if (_ISINFf(__a) || _ISINFf(__b)) {
- __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
- __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
- if (_ISNANf(__c))
- __c = _COPYSIGNf(0, __c);
- if (_ISNANf(__d))
- __d = _COPYSIGNf(0, __d);
+ if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
+ __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
+ __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysignf(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysignf(0, __d);
__recalc = 1;
}
- if (_ISINFf(__c) || _ISINFf(__d)) {
- __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
- __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
- if (_ISNANf(__a))
- __a = _COPYSIGNf(0, __a);
- if (_ISNANf(__b))
- __b = _COPYSIGNf(0, __b);
+ if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
+ __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
+ __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysignf(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysignf(0, __b);
__recalc = 1;
}
- if (!__recalc &&
- (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
- if (_ISNANf(__a))
- __a = _COPYSIGNf(0, __a);
- if (_ISNANf(__b))
- __b = _COPYSIGNf(0, __b);
- if (_ISNANf(__c))
- __c = _COPYSIGNf(0, __c);
- if (_ISNANf(__d))
- __d = _COPYSIGNf(0, __d);
+ if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
+ __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysignf(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysignf(0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysignf(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysignf(0, __d);
__recalc = 1;
}
if (__recalc) {
@@ -191,8 +145,9 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
// Can't use std::max, because that's defined in <algorithm>, and we don't
// want to pull that in for every compile. The CUDA headers define
// ::max(float, float) and ::max(double, double), which is sufficient for us.
- double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
- if (_ISFINITEd(__logbw)) {
+ double __logbw =
+ _LOGBd(__builtin_fmax(__builtin_fabs(__c), __builtin_fabs(__d)));
+ if (__builtin_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = _SCALBNd(__c, -__ilogbw);
__d = _SCALBNd(__d, -__ilogbw);
@@ -201,20 +156,20 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
double _Complex z;
__real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
__imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
- if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
- __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
- __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
- } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
- _ISFINITEd(__d)) {
- __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
- __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
+ if ((__denom == 0.0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
+ __real__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __a;
+ __imag__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __b;
+ } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
+ __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
+ __a = __builtin_copysign(__builtin_isinf(__a) ? 1.0 : 0.0, __a);
+ __b = __builtin_copysign(__builtin_isinf(__b) ? 1.0 : 0.0, __b);
__real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
__imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
- } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
- _ISFINITEd(__b)) {
- __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
- __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
+ } else if (__builtin_isinf(__logbw) && __logbw > 0.0 &&
+ __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
+ __c = __builtin_copysign(__builtin_isinf(__c) ? 1.0 : 0.0, __c);
+ __d = __builtin_copysign(__builtin_isinf(__d) ? 1.0 : 0.0, __d);
__real__(z) = 0.0 * (__a * __c + __b * __d);
__imag__(z) = 0.0 * (__b * __c - __a * __d);
}
@@ -224,8 +179,9 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
int __ilogbw = 0;
- float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
- if (_ISFINITEf(__logbw)) {
+ float __logbw =
+ _LOGBf(__builtin_fmaxf(__builtin_fabsf(__c), __builtin_fabsf(__d)));
+ if (__builtin_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = _SCALBNf(__c, -__ilogbw);
__d = _SCALBNf(__d, -__ilogbw);
@@ -234,20 +190,20 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
float _Complex z;
__real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
__imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
- if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
- __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
- __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
- } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
- _ISFINITEf(__d)) {
- __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
- __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
+ if ((__denom == 0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
+ __real__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __a;
+ __imag__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __b;
+ } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
+ __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
+ __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
+ __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
__real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
__imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
- } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
- _ISFINITEf(__b)) {
- __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
- __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
+ } else if (__builtin_isinf(__logbw) && __logbw > 0 &&
+ __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
+ __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
+ __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
__real__(z) = 0 * (__a * __c + __b * __d);
__imag__(z) = 0 * (__b * __c - __a * __d);
}
@@ -259,22 +215,10 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
} // extern "C"
#endif
-#undef _ISNANd
-#undef _ISNANf
-#undef _ISINFd
-#undef _ISINFf
-#undef _COPYSIGNd
-#undef _COPYSIGNf
-#undef _ISFINITEd
-#undef _ISFINITEf
#undef _SCALBNd
#undef _SCALBNf
-#undef _ABSd
-#undef _ABSf
#undef _LOGBd
#undef _LOGBf
-#undef _fmaxd
-#undef _fmaxf
#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
#pragma omp end declare target
diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index 108f159ee5308..b347cf4716df2 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -14,17 +14,17 @@ void test_complex_f64(double _Complex a) {
}
// CHECK: define weak {{.*}} @__divdc3
-// CHECK-DAG: call double @__ocml_fabs_f64(
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isfinite_f64(
-// CHECK-DAG: call double @__ocml_copysign_f64(
-// CHECK-DAG: call double @__ocml_scalbn_f64(
-// CHECK-DAG: call double @__ocml_logb_f64(
+// CHECK-DAG: call double @llvm.fabs.f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 504)
+// CHECK-DAG: call double @llvm.copysign.f64(
+// CHECK-DAG: call double @llvm.ldexp.f64.i32(
+// CHECK-DAG: call { double, i32 } @llvm.frexp.f64.i32
// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isinf_f64(
-// CHECK-DAG: call double @__ocml_copysign_f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
void test_complex_f32(float _Complex a) {
// CHECK-LABEL: define {{.*}}test_complex_f32
@@ -37,14 +37,14 @@ void test_complex_f32(float _Complex a) {
}
// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call float @__ocml_fabs_f32(
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isfinite_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
-// CHECK-DAG: call float @__ocml_scalbn_f32(
-// CHECK-DAG: call float @__ocml_logb_f32(
+// CHECK-DAG: call float @llvm.fabs.f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
+// CHECK-DAG: call float @llvm.ldexp.f32.i32(
+// CHECK-DAG: call { float, i32 } @llvm.frexp.f32.i32
// CHECK: define weak {{.*}} @__mulsc3
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isinf_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp b/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
index 13bfdd11a309b..bba1794001059 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
@@ -6,31 +6,31 @@
#include <complex>
// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isinf_f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
// CHECK: define weak {{.*}} @__mulsc3
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isinf_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
// CHECK: define weak {{.*}} @__divdc3
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isinf_f64(
-// CHECK-DAG: call i32 @__ocml_isfinite_f64(
-// CHECK-DAG: call double @__ocml_copysign_f64(
-// CHECK-DAG: call double @__ocml_scalbn_f64(
-// CHECK-DAG: call double @__ocml_fabs_f64(
-// CHECK-DAG: call double @__ocml_logb_f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 504)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
+// CHECK-DAG: call double @llvm.ldexp.f64.i32(
+// CHECK-DAG: call double @llvm.fabs.f64(
+// CHECK-DAG: call { double, i32 } @llvm.frexp.f64.i32
// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isinf_f32(
-// CHECK-DAG: call i32 @__ocml_isfinite_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
-// CHECK-DAG: call float @__ocml_scalbn_f32(
-// CHECK-DAG: call float @__ocml_fabs_f32(
-// CHECK-DAG: call float @__ocml_logb_f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 504)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
+// CHECK-DAG: call float @llvm.ldexp.f32.i32(
+// CHECK-DAG: call float @llvm.fabs.f32(
+// CHECK-DAG: call { float, i32 } @llvm.frexp.f32.i32
// We actually check that there are no declarations of non-OpenMP functions.
// That is, as long as we don't call an unkown function with a name that
diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 354e9a10adf29..a5f2109c9054e 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -12,32 +12,32 @@
#endif
// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call i32 @__nv_isnanf(
-// CHECK-DAG: call i32 @__nv_isinff(
-// CHECK-DAG: call i32 @__nv_finitef(
-// CHECK-DAG: call float @__nv_copysignf(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 504)
+// CHECK-DAG: call float @llvm.copysign.f32(
// CHECK-DAG: call float @__nv_scalbnf(
-// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @llvm.fabs.f32(
// CHECK-DAG: call float @__nv_logbf(
// CHECK: define weak {{.*}} @__mulsc3
-// CHECK-DAG: call i32 @__nv_isnanf(
-// CHECK-DAG: call i32 @__nv_isinff(
-// CHECK-DAG: call float @__nv_copysignf(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
// CHECK: define weak {{.*}} @__divdc3
-// CHECK-DAG: call i32 @__nv_isnand(
-// CHECK-DAG: call i32 @__nv_isinfd(
-// CHECK-DAG: call i32 @__nv_isfinited(
-// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 504)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
// CHECK-DAG: call double @__nv_scalbn(
-// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @llvm.fabs.f64(
// CHECK-DAG: call double @__nv_logb(
// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__nv_isnand(
-// CHECK-DAG: call i32 @__nv_isinfd(
-// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
void test_scmplx(float _Complex a...
[truncated]
|
|
@llvm/pr-subscribers-clang Author: Matt Arsenault (arsenm) ChangesThere's no reason to use the ocml or nv prefixed functions and Patch is 22.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171106.diff 5 Files Affected:
diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index 7bc7bc2ce63e1..e3038dcb8fd36 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -23,63 +23,17 @@
#define __DEVICE__ __device__ inline
#endif
-// To make the algorithms available for C and C++ in CUDA and OpenMP we select
-// different but equivalent function versions. TODO: For OpenMP we currently
-// select the native builtins as the overload support for templates is lacking.
-#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
-#define _ISNANd std::isnan
-#define _ISNANf std::isnan
-#define _ISINFd std::isinf
-#define _ISINFf std::isinf
-#define _ISFINITEd std::isfinite
-#define _ISFINITEf std::isfinite
-#define _COPYSIGNd std::copysign
-#define _COPYSIGNf std::copysign
-#define _SCALBNd std::scalbn
-#define _SCALBNf std::scalbn
-#define _ABSd std::abs
-#define _ABSf std::abs
-#define _LOGBd std::logb
-#define _LOGBf std::logb
-// Rather than pulling in std::max from algorithm everytime, use available ::max.
-#define _fmaxd max
-#define _fmaxf max
-#else
-#ifdef __AMDGCN__
-#define _ISNANd __ocml_isnan_f64
-#define _ISNANf __ocml_isnan_f32
-#define _ISINFd __ocml_isinf_f64
-#define _ISINFf __ocml_isinf_f32
-#define _ISFINITEd __ocml_isfinite_f64
-#define _ISFINITEf __ocml_isfinite_f32
-#define _COPYSIGNd __ocml_copysign_f64
-#define _COPYSIGNf __ocml_copysign_f32
-#define _SCALBNd __ocml_scalbn_f64
-#define _SCALBNf __ocml_scalbn_f32
-#define _ABSd __ocml_fabs_f64
-#define _ABSf __ocml_fabs_f32
-#define _LOGBd __ocml_logb_f64
-#define _LOGBf __ocml_logb_f32
-#define _fmaxd __ocml_fmax_f64
-#define _fmaxf __ocml_fmax_f32
-#else
-#define _ISNANd __nv_isnand
-#define _ISNANf __nv_isnanf
-#define _ISINFd __nv_isinfd
-#define _ISINFf __nv_isinff
-#define _ISFINITEd __nv_isfinited
-#define _ISFINITEf __nv_finitef
-#define _COPYSIGNd __nv_copysign
-#define _COPYSIGNf __nv_copysignf
+#ifdef __NVPTX__
+// FIXME: NVPTX should use generic builtins.
#define _SCALBNd __nv_scalbn
#define _SCALBNf __nv_scalbnf
-#define _ABSd __nv_fabs
-#define _ABSf __nv_fabsf
#define _LOGBd __nv_logb
#define _LOGBf __nv_logbf
-#define _fmaxd __nv_fmax
-#define _fmaxf __nv_fmaxf
-#endif
+#else
+#define _SCALBNd __builtin_scalbn
+#define _SCALBNf __builtin_scalbnf
+#define _LOGBd __builtin_logb
+#define _LOGBf __builtin_logbf
#endif
#if defined(__cplusplus)
@@ -95,36 +49,36 @@ __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
double _Complex z;
__real__(z) = __ac - __bd;
__imag__(z) = __ad + __bc;
- if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
int __recalc = 0;
- if (_ISINFd(__a) || _ISINFd(__b)) {
- __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
- __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
- if (_ISNANd(__c))
- __c = _COPYSIGNd(0, __c);
- if (_ISNANd(__d))
- __d = _COPYSIGNd(0, __d);
+ if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
+ __a = __builtin_copysign(__builtin_isinf(__a) ? 1 : 0, __a);
+ __b = __builtin_copysign(__builtin_isinf(__b) ? 1 : 0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysign(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysign(0, __d);
__recalc = 1;
}
- if (_ISINFd(__c) || _ISINFd(__d)) {
- __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
- __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
- if (_ISNANd(__a))
- __a = _COPYSIGNd(0, __a);
- if (_ISNANd(__b))
- __b = _COPYSIGNd(0, __b);
+ if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
+ __c = __builtin_copysign(__builtin_isinf(__c) ? 1 : 0, __c);
+ __d = __builtin_copysign(__builtin_isinf(__d) ? 1 : 0, __d);
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysign(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysign(0, __b);
__recalc = 1;
}
- if (!__recalc &&
- (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
- if (_ISNANd(__a))
- __a = _COPYSIGNd(0, __a);
- if (_ISNANd(__b))
- __b = _COPYSIGNd(0, __b);
- if (_ISNANd(__c))
- __c = _COPYSIGNd(0, __c);
- if (_ISNANd(__d))
- __d = _COPYSIGNd(0, __d);
+ if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
+ __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysign(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysign(0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysign(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysign(0, __d);
__recalc = 1;
}
if (__recalc) {
@@ -145,36 +99,36 @@ __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
float _Complex z;
__real__(z) = __ac - __bd;
__imag__(z) = __ad + __bc;
- if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
int __recalc = 0;
- if (_ISINFf(__a) || _ISINFf(__b)) {
- __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
- __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
- if (_ISNANf(__c))
- __c = _COPYSIGNf(0, __c);
- if (_ISNANf(__d))
- __d = _COPYSIGNf(0, __d);
+ if (__builtin_isinf(__a) || __builtin_isinf(__b)) {
+ __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
+ __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysignf(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysignf(0, __d);
__recalc = 1;
}
- if (_ISINFf(__c) || _ISINFf(__d)) {
- __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
- __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
- if (_ISNANf(__a))
- __a = _COPYSIGNf(0, __a);
- if (_ISNANf(__b))
- __b = _COPYSIGNf(0, __b);
+ if (__builtin_isinf(__c) || __builtin_isinf(__d)) {
+ __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
+ __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysignf(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysignf(0, __b);
__recalc = 1;
}
- if (!__recalc &&
- (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
- if (_ISNANf(__a))
- __a = _COPYSIGNf(0, __a);
- if (_ISNANf(__b))
- __b = _COPYSIGNf(0, __b);
- if (_ISNANf(__c))
- __c = _COPYSIGNf(0, __c);
- if (_ISNANf(__d))
- __d = _COPYSIGNf(0, __d);
+ if (!__recalc && (__builtin_isinf(__ac) || __builtin_isinf(__bd) ||
+ __builtin_isinf(__ad) || __builtin_isinf(__bc))) {
+ if (__builtin_isnan(__a))
+ __a = __builtin_copysignf(0, __a);
+ if (__builtin_isnan(__b))
+ __b = __builtin_copysignf(0, __b);
+ if (__builtin_isnan(__c))
+ __c = __builtin_copysignf(0, __c);
+ if (__builtin_isnan(__d))
+ __d = __builtin_copysignf(0, __d);
__recalc = 1;
}
if (__recalc) {
@@ -191,8 +145,9 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
// Can't use std::max, because that's defined in <algorithm>, and we don't
// want to pull that in for every compile. The CUDA headers define
// ::max(float, float) and ::max(double, double), which is sufficient for us.
- double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
- if (_ISFINITEd(__logbw)) {
+ double __logbw =
+ _LOGBd(__builtin_fmax(__builtin_fabs(__c), __builtin_fabs(__d)));
+ if (__builtin_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = _SCALBNd(__c, -__ilogbw);
__d = _SCALBNd(__d, -__ilogbw);
@@ -201,20 +156,20 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
double _Complex z;
__real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
__imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
- if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
- __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
- __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
- } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
- _ISFINITEd(__d)) {
- __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
- __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
+ if ((__denom == 0.0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
+ __real__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __a;
+ __imag__(z) = __builtin_copysign(__builtin_huge_val(), __c) * __b;
+ } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
+ __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
+ __a = __builtin_copysign(__builtin_isinf(__a) ? 1.0 : 0.0, __a);
+ __b = __builtin_copysign(__builtin_isinf(__b) ? 1.0 : 0.0, __b);
__real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
__imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
- } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
- _ISFINITEd(__b)) {
- __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
- __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
+ } else if (__builtin_isinf(__logbw) && __logbw > 0.0 &&
+ __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
+ __c = __builtin_copysign(__builtin_isinf(__c) ? 1.0 : 0.0, __c);
+ __d = __builtin_copysign(__builtin_isinf(__d) ? 1.0 : 0.0, __d);
__real__(z) = 0.0 * (__a * __c + __b * __d);
__imag__(z) = 0.0 * (__b * __c - __a * __d);
}
@@ -224,8 +179,9 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
int __ilogbw = 0;
- float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
- if (_ISFINITEf(__logbw)) {
+ float __logbw =
+ _LOGBf(__builtin_fmaxf(__builtin_fabsf(__c), __builtin_fabsf(__d)));
+ if (__builtin_isfinite(__logbw)) {
__ilogbw = (int)__logbw;
__c = _SCALBNf(__c, -__ilogbw);
__d = _SCALBNf(__d, -__ilogbw);
@@ -234,20 +190,20 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
float _Complex z;
__real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
__imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
- if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
- __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
- __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
- } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
- _ISFINITEf(__d)) {
- __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
- __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
+ if (__builtin_isnan(__real__(z)) && __builtin_isnan(__imag__(z))) {
+ if ((__denom == 0) && (!__builtin_isnan(__a) || !__builtin_isnan(__b))) {
+ __real__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __a;
+ __imag__(z) = __builtin_copysignf(__builtin_huge_valf(), __c) * __b;
+ } else if ((__builtin_isinf(__a) || __builtin_isinf(__b)) &&
+ __builtin_isfinite(__c) && __builtin_isfinite(__d)) {
+ __a = __builtin_copysignf(__builtin_isinf(__a) ? 1 : 0, __a);
+ __b = __builtin_copysignf(__builtin_isinf(__b) ? 1 : 0, __b);
__real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
__imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
- } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
- _ISFINITEf(__b)) {
- __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
- __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
+ } else if (__builtin_isinf(__logbw) && __logbw > 0 &&
+ __builtin_isfinite(__a) && __builtin_isfinite(__b)) {
+ __c = __builtin_copysignf(__builtin_isinf(__c) ? 1 : 0, __c);
+ __d = __builtin_copysignf(__builtin_isinf(__d) ? 1 : 0, __d);
__real__(z) = 0 * (__a * __c + __b * __d);
__imag__(z) = 0 * (__b * __c - __a * __d);
}
@@ -259,22 +215,10 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
} // extern "C"
#endif
-#undef _ISNANd
-#undef _ISNANf
-#undef _ISINFd
-#undef _ISINFf
-#undef _COPYSIGNd
-#undef _COPYSIGNf
-#undef _ISFINITEd
-#undef _ISFINITEf
#undef _SCALBNd
#undef _SCALBNf
-#undef _ABSd
-#undef _ABSf
#undef _LOGBd
#undef _LOGBf
-#undef _fmaxd
-#undef _fmaxf
#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
#pragma omp end declare target
diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index 108f159ee5308..b347cf4716df2 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -14,17 +14,17 @@ void test_complex_f64(double _Complex a) {
}
// CHECK: define weak {{.*}} @__divdc3
-// CHECK-DAG: call double @__ocml_fabs_f64(
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isfinite_f64(
-// CHECK-DAG: call double @__ocml_copysign_f64(
-// CHECK-DAG: call double @__ocml_scalbn_f64(
-// CHECK-DAG: call double @__ocml_logb_f64(
+// CHECK-DAG: call double @llvm.fabs.f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 504)
+// CHECK-DAG: call double @llvm.copysign.f64(
+// CHECK-DAG: call double @llvm.ldexp.f64.i32(
+// CHECK-DAG: call { double, i32 } @llvm.frexp.f64.i32
// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isinf_f64(
-// CHECK-DAG: call double @__ocml_copysign_f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
void test_complex_f32(float _Complex a) {
// CHECK-LABEL: define {{.*}}test_complex_f32
@@ -37,14 +37,14 @@ void test_complex_f32(float _Complex a) {
}
// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call float @__ocml_fabs_f32(
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isfinite_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
-// CHECK-DAG: call float @__ocml_scalbn_f32(
-// CHECK-DAG: call float @__ocml_logb_f32(
+// CHECK-DAG: call float @llvm.fabs.f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
+// CHECK-DAG: call float @llvm.ldexp.f32.i32(
+// CHECK-DAG: call { float, i32 } @llvm.frexp.f32.i32
// CHECK: define weak {{.*}} @__mulsc3
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isinf_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp b/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
index 13bfdd11a309b..bba1794001059 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp
@@ -6,31 +6,31 @@
#include <complex>
// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isinf_f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
// CHECK: define weak {{.*}} @__mulsc3
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isinf_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
// CHECK: define weak {{.*}} @__divdc3
-// CHECK-DAG: call i32 @__ocml_isnan_f64(
-// CHECK-DAG: call i32 @__ocml_isinf_f64(
-// CHECK-DAG: call i32 @__ocml_isfinite_f64(
-// CHECK-DAG: call double @__ocml_copysign_f64(
-// CHECK-DAG: call double @__ocml_scalbn_f64(
-// CHECK-DAG: call double @__ocml_fabs_f64(
-// CHECK-DAG: call double @__ocml_logb_f64(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 504)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
+// CHECK-DAG: call double @llvm.ldexp.f64.i32(
+// CHECK-DAG: call double @llvm.fabs.f64(
+// CHECK-DAG: call { double, i32 } @llvm.frexp.f64.i32
// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call i32 @__ocml_isnan_f32(
-// CHECK-DAG: call i32 @__ocml_isinf_f32(
-// CHECK-DAG: call i32 @__ocml_isfinite_f32(
-// CHECK-DAG: call float @__ocml_copysign_f32(
-// CHECK-DAG: call float @__ocml_scalbn_f32(
-// CHECK-DAG: call float @__ocml_fabs_f32(
-// CHECK-DAG: call float @__ocml_logb_f32(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 504)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
+// CHECK-DAG: call float @llvm.ldexp.f32.i32(
+// CHECK-DAG: call float @llvm.fabs.f32(
+// CHECK-DAG: call { float, i32 } @llvm.frexp.f32.i32
// We actually check that there are no declarations of non-OpenMP functions.
// That is, as long as we don't call an unkown function with a name that
diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 354e9a10adf29..a5f2109c9054e 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -12,32 +12,32 @@
#endif
// CHECK: define weak {{.*}} @__divsc3
-// CHECK-DAG: call i32 @__nv_isnanf(
-// CHECK-DAG: call i32 @__nv_isinff(
-// CHECK-DAG: call i32 @__nv_finitef(
-// CHECK-DAG: call float @__nv_copysignf(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 504)
+// CHECK-DAG: call float @llvm.copysign.f32(
// CHECK-DAG: call float @__nv_scalbnf(
-// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @llvm.fabs.f32(
// CHECK-DAG: call float @__nv_logbf(
// CHECK: define weak {{.*}} @__mulsc3
-// CHECK-DAG: call i32 @__nv_isnanf(
-// CHECK-DAG: call i32 @__nv_isinff(
-// CHECK-DAG: call float @__nv_copysignf(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f32(float %{{.+}}, i32 516)
+// CHECK-DAG: call float @llvm.copysign.f32(
// CHECK: define weak {{.*}} @__divdc3
-// CHECK-DAG: call i32 @__nv_isnand(
-// CHECK-DAG: call i32 @__nv_isinfd(
-// CHECK-DAG: call i32 @__nv_isfinited(
-// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 504)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
// CHECK-DAG: call double @__nv_scalbn(
-// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @llvm.fabs.f64(
// CHECK-DAG: call double @__nv_logb(
// CHECK: define weak {{.*}} @__muldc3
-// CHECK-DAG: call i32 @__nv_isnand(
-// CHECK-DAG: call i32 @__nv_isinfd(
-// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 3)
+// CHECK-DAG: call i1 @llvm.is.fpclass.f64(double %{{.+}}, i32 516)
+// CHECK-DAG: call double @llvm.copysign.f64(
void test_scmplx(float _Complex a...
[truncated]
|
|
Hi, |
JonChesterfield
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very nice Matt! Strongly in favour, thank you for drive by fixing this.
It should be nearly the same. The biggest difference might be for logb/ilogb, since a larger expansion happens directly in clang now |

There's no reason to use the ocml or nv prefixed functions and
maintain this list of alias macros. I left these macros in for
NVPTX in the scalbn and logb case, since those have a special
case hack in the AMDGPU codegen and probably do not work on ptx.