New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CUDA: Implement frexp, tgamma, ldexp, modf, remquo #836
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a test which uses these to run_cuda_tests.sh?
@pjaaskel, the formatter doesn't like the new lines I've added. diff --git a/lib/kernel/templates.h b/lib/kernel/templates.h
index d9fd018c..e0d1df9a 100644
--- a/lib/kernel/templates.h
+++ b/lib/kernel/templates.h
@@ -1878,21 +1878,22 @@
IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, VTYPE, __global)
#define DEFINE_BUILTIN_V_VPV(NAME) \
__IF_FP16 ( \
- half _CL_OVERLOADABLE _CL_READNONE NAME (half a, __private half *b) \
- { \
- /* use float builtin */ \
- __private float c; \
- __private float r = __builtin_##NAME##f (a, &c); \
- *b = c; \
- return r; \
- } \
- IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __local) \
- IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __global) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, half2, half, half, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, half3, half2, half, lo, s2) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, half4, half2, half2, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, half8, half4, half4, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, half16, half8, half8, lo, hi)) \
+ half _CL_OVERLOADABLE _CL_READNONE NAME (half a, __private half *b) { \
+ /* use float builtin */ \
+ __private float c; \
+ __private float r = __builtin_##NAME##f (a, &c); \
+ *b = c; \
+ return r; \
+ } IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __local) \
+ IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __global) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, half2, half, half, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, half3, half2, half, lo, s2) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, half4, half2, half2, lo, \
+ hi) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, half8, half4, half4, \
+ lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, half16, half8, \
+ half8, lo, hi)) \
float _CL_OVERLOADABLE _CL_READNONE NAME (float a, __private float *b) \
{ \
return __builtin_##NAME##f (a, b); \
@@ -1905,17 +1906,20 @@
IMPLEMENT_BUILTIN_V_VPV (NAME, float8, float4, float4, lo, hi) \
IMPLEMENT_BUILTIN_V_VPV (NAME, float16, float8, float8, lo, hi) \
__IF_FP64 ( \
- double _CL_OVERLOADABLE _CL_READNONE NAME (double a, __private double *b) \
- { \
- return __builtin_##NAME (a, b); \
- } \
- IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __local) \
- IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __global) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, double2, double, double, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, double3, double2, double, lo, s2) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, double4, double2, double2, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, double8, double4, double4, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPV (NAME, double16, double8, double8, lo, hi))
+ double _CL_OVERLOADABLE _CL_READNONE NAME (double a, \
+ __private double *b) { \
+ return __builtin_##NAME (a, b); \
+ } IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __local) \
+ IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __global) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, double2, double, double, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, double3, double2, double, \
+ lo, s2) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, double4, double2, \
+ double2, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPV (NAME, double8, double4, \
+ double4, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPV ( \
+ NAME, double16, double8, double8, lo, hi))
#define IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE(NAME, VTYPE, JTYPE, ADDRSPACE) \
VTYPE __attribute__ ((overloadable)) \
@@ -1941,21 +1945,23 @@
IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, VTYPE, JTYPE, __global)
#define DEFINE_BUILTIN_V_VVPJ(NAME) \
__IF_FP16 ( \
- half _CL_OVERLOADABLE _CL_READNONE NAME (half a, half b, int *c) \
- { \
- /* use float builtin */ \
- __private int d; \
- __private float r = __builtin_##NAME##f (a, b, &d); \
- *c = d; \
- return r; \
- } \
- IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __local) \
- IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __global) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, half2, int2, int, int, lo, hi) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, half3, int3, int2, int, lo, s2) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, half4, int4, int2, int2, lo, hi) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, half8, int8, int4, int4, lo, hi) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, half16, int16, int8, int8, lo, hi)) \
+ half _CL_OVERLOADABLE _CL_READNONE NAME (half a, half b, int *c) { \
+ /* use float builtin */ \
+ __private int d; \
+ __private float r = __builtin_##NAME##f (a, b, &d); \
+ *c = d; \
+ return r; \
+ } IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __local) \
+ IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __global) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, half2, int2, int, int, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, half3, int3, int2, int, lo, \
+ s2) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, half4, int4, int2, \
+ int2, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, half8, int8, int4, \
+ int4, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, half16, int16, \
+ int8, int8, lo, hi)) \
float _CL_OVERLOADABLE _CL_READNONE NAME (float a, float b, int *c) \
{ \
return __builtin_##NAME##f (a, b, c); \
@@ -1968,17 +1974,21 @@
IMPLEMENT_BUILTIN_V_VVPJ (NAME, float8, int8, int4, int4, lo, hi) \
IMPLEMENT_BUILTIN_V_VVPJ (NAME, float16, int16, int8, int8, lo, hi) \
__IF_FP64 ( \
- double _CL_OVERLOADABLE _CL_READNONE NAME (double a, double b, int *c) \
- { \
- return __builtin_##NAME (a, b, c); \
- } \
- IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __local) \
- IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __global) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, double2, int2, int, int, lo, hi) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, double3, int3, int2, int, lo, s2) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, double4, int4, int2, int2, lo, hi) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, double8, int8, int4, int4, lo, hi) \
- IMPLEMENT_BUILTIN_V_VVPJ (NAME, double16, int16, int8, int8, lo, hi))
+ double _CL_OVERLOADABLE _CL_READNONE NAME (double a, double b, \
+ int *c) { \
+ return __builtin_##NAME (a, b, c); \
+ } IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __local) \
+ IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __global) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, double2, int2, int, int, lo, \
+ hi) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, double3, int3, int2, int, \
+ lo, s2) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, double4, int4, int2, \
+ int2, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VVPJ (NAME, double8, int8, \
+ int4, int4, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VVPJ ( \
+ NAME, double16, int16, int8, int8, lo, hi))
#define IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE(NAME, VTYPE, JTYPE, ADDRSPACE) \
VTYPE __attribute__ ((overloadable)) NAME (VTYPE a, ADDRSPACE JTYPE *c) \
@@ -2003,21 +2013,23 @@
IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, VTYPE, JTYPE, __global)
#define DEFINE_BUILTIN_V_VPJ(NAME) \
__IF_FP16 ( \
- half _CL_OVERLOADABLE _CL_READNONE NAME (half a, int *c) \
- { \
- /* use float builtin */ \
- __private int d; \
- __private float r = __builtin_##NAME##f (a, &d); \
- *c = d; \
- return r; \
- } \
- IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __local) \
- IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __global) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, half2, int2, int, int, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, half3, int3, int2, int, lo, s2) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, half4, int4, int2, int2, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, half8, int8, int4, int4, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, half16, int16, int8, int8, lo, hi)) \
+ half _CL_OVERLOADABLE _CL_READNONE NAME (half a, int *c) { \
+ /* use float builtin */ \
+ __private int d; \
+ __private float r = __builtin_##NAME##f (a, &d); \
+ *c = d; \
+ return r; \
+ } IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __local) \
+ IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __global) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, half2, int2, int, int, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, half3, int3, int2, int, lo, \
+ s2) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, half4, int4, int2, int2, \
+ lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, half8, int8, int4, \
+ int4, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, half16, int16, \
+ int8, int8, lo, hi)) \
float _CL_OVERLOADABLE _CL_READNONE NAME (float a, int *c) \
{ \
return __builtin_##NAME##f (a, c); \
@@ -2030,17 +2042,19 @@
IMPLEMENT_BUILTIN_V_VPJ (NAME, float8, int8, int4, int4, lo, hi) \
IMPLEMENT_BUILTIN_V_VPJ (NAME, float16, int16, int8, int8, lo, hi) \
__IF_FP64 ( \
- double _CL_OVERLOADABLE _CL_READNONE NAME (double a, int *c) \
- { \
- return __builtin_##NAME (a, c); \
- } \
- IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __local) \
- IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __global) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, double2, int2, int, int, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, double3, int3, int2, int, lo, s2) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, double4, int4, int2, int2, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, double8, int8, int4, int4, lo, hi) \
- IMPLEMENT_BUILTIN_V_VPJ (NAME, double16, int16, int8, int8, lo, hi))
+ double _CL_OVERLOADABLE _CL_READNONE NAME (double a, int *c) { \
+ return __builtin_##NAME (a, c); \
+ } IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __local) \
+ IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __global) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, double2, int2, int, int, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, double3, int3, int2, int, \
+ lo, s2) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, double4, int4, int2, \
+ int2, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, double8, int8, int4, \
+ int4, lo, hi) \
+ IMPLEMENT_BUILTIN_V_VPJ (NAME, double16, int16, \
+ int8, int8, lo, hi))
#define __SINGLE_WI \
if (get_local_id(0) == 0 && \ |
Yeah, that's what I guessed: the macro magic is too complex for it. So, pls just revert it manually for now. |
Fixes #835