Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA: Implement frexp, tgamma, ldexp, modf, remquo #836

Merged
merged 3 commits into from Jun 9, 2020

Conversation

isuruf
Copy link
Member

@isuruf isuruf commented May 27, 2020

Fixes #835

@isuruf isuruf mentioned this pull request May 27, 2020
@isuruf isuruf changed the title CUDA: Implement frexp, tgamma, ldexp CUDA: Implement frexp, tgamma, ldexp, modf, remquo May 28, 2020
Copy link
Member

@pjaaskel pjaaskel left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a test which uses these to run_cuda_tests.sh?

lib/kernel/templates.h Outdated Show resolved Hide resolved
@isuruf
Copy link
Member Author

isuruf commented May 28, 2020

@pjaaskel, the formatter doesn't like the new lines I've added.
It's suggesting the following formatting,

diff --git a/lib/kernel/templates.h b/lib/kernel/templates.h
index d9fd018c..e0d1df9a 100644
--- a/lib/kernel/templates.h
+++ b/lib/kernel/templates.h
@@ -1878,21 +1878,22 @@
   IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, VTYPE, __global)
 #define DEFINE_BUILTIN_V_VPV(NAME)                                            \
   __IF_FP16 (                                                                 \
-  half _CL_OVERLOADABLE _CL_READNONE NAME (half a, __private half *b)         \
-  {                                                                           \
-    /* use float builtin */                                                   \
-    __private float c;                                                        \
-    __private float r = __builtin_##NAME##f (a, &c);                          \
-    *b = c;                                                                   \
-    return r;                                                                 \
-  }                                                                           \
-  IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __local)                     \
-  IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __global)                    \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, half2, half, half, lo, hi)                   \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, half3, half2, half, lo, s2)                  \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, half4, half2, half2, lo, hi)                 \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, half8, half4, half4, lo, hi)                 \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, half16, half8, half8, lo, hi))               \
+      half _CL_OVERLOADABLE _CL_READNONE NAME (half a, __private half *b) {   \
+        /* use float builtin */                                               \
+        __private float c;                                                    \
+        __private float r = __builtin_##NAME##f (a, &c);                      \
+        *b = c;                                                               \
+        return r;                                                             \
+      } IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __local)               \
+          IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, half, __global)            \
+              IMPLEMENT_BUILTIN_V_VPV (NAME, half2, half, half, lo, hi)       \
+                  IMPLEMENT_BUILTIN_V_VPV (NAME, half3, half2, half, lo, s2)  \
+                      IMPLEMENT_BUILTIN_V_VPV (NAME, half4, half2, half2, lo, \
+                                               hi)                            \
+                          IMPLEMENT_BUILTIN_V_VPV (NAME, half8, half4, half4, \
+                                                   lo, hi)                    \
+                              IMPLEMENT_BUILTIN_V_VPV (NAME, half16, half8,   \
+                                                       half8, lo, hi))        \
   float _CL_OVERLOADABLE _CL_READNONE NAME (float a, __private float *b)      \
   {                                                                           \
     return __builtin_##NAME##f (a, b);                                        \
@@ -1905,17 +1906,20 @@
   IMPLEMENT_BUILTIN_V_VPV (NAME, float8, float4, float4, lo, hi)              \
   IMPLEMENT_BUILTIN_V_VPV (NAME, float16, float8, float8, lo, hi)             \
   __IF_FP64 (                                                                 \
-  double _CL_OVERLOADABLE _CL_READNONE NAME (double a, __private double *b)   \
-  {                                                                           \
-    return __builtin_##NAME (a, b);                                           \
-  }                                                                           \
-  IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __local)                   \
-  IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __global)                  \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, double2, double, double, lo, hi)             \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, double3, double2, double, lo, s2)            \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, double4, double2, double2, lo, hi)           \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, double8, double4, double4, lo, hi)           \
-  IMPLEMENT_BUILTIN_V_VPV (NAME, double16, double8, double8, lo, hi))
+      double _CL_OVERLOADABLE _CL_READNONE NAME (double a,                    \
+                                                 __private double *b) {       \
+        return __builtin_##NAME (a, b);                                       \
+      } IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __local)             \
+          IMPLEMENT_BUILTIN_V_VPV_ADDRSPACE (NAME, double, __global)          \
+              IMPLEMENT_BUILTIN_V_VPV (NAME, double2, double, double, lo, hi) \
+                  IMPLEMENT_BUILTIN_V_VPV (NAME, double3, double2, double,    \
+                                           lo, s2)                            \
+                      IMPLEMENT_BUILTIN_V_VPV (NAME, double4, double2,        \
+                                               double2, lo, hi)               \
+                          IMPLEMENT_BUILTIN_V_VPV (NAME, double8, double4,    \
+                                                   double4, lo, hi)           \
+                              IMPLEMENT_BUILTIN_V_VPV (                       \
+                                  NAME, double16, double8, double8, lo, hi))
 
 #define IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE(NAME, VTYPE, JTYPE, ADDRSPACE)     \
   VTYPE __attribute__ ((overloadable))                                        \
@@ -1941,21 +1945,23 @@
   IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, VTYPE, JTYPE, __global)
 #define DEFINE_BUILTIN_V_VVPJ(NAME)                                           \
   __IF_FP16 (                                                                 \
-  half _CL_OVERLOADABLE _CL_READNONE NAME (half a, half b, int *c)            \
-  {                                                                           \
-    /* use float builtin */                                                   \
-    __private int d;                                                          \
-    __private float r = __builtin_##NAME##f (a, b, &d);                       \
-    *c = d;                                                                   \
-    return r;                                                                 \
-  }                                                                           \
-  IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __local)               \
-  IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __global)              \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, half2, int2, int, int, lo, hi)              \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, half3, int3, int2, int, lo, s2)             \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, half4, int4, int2, int2, lo, hi)            \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, half8, int8, int4, int4, lo, hi)            \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, half16, int16, int8, int8, lo, hi))         \
+      half _CL_OVERLOADABLE _CL_READNONE NAME (half a, half b, int *c) {      \
+        /* use float builtin */                                               \
+        __private int d;                                                      \
+        __private float r = __builtin_##NAME##f (a, b, &d);                   \
+        *c = d;                                                               \
+        return r;                                                             \
+      } IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __local)         \
+          IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, half, int, __global)      \
+              IMPLEMENT_BUILTIN_V_VVPJ (NAME, half2, int2, int, int, lo, hi)  \
+                  IMPLEMENT_BUILTIN_V_VVPJ (NAME, half3, int3, int2, int, lo, \
+                                            s2)                               \
+                      IMPLEMENT_BUILTIN_V_VVPJ (NAME, half4, int4, int2,      \
+                                                int2, lo, hi)                 \
+                          IMPLEMENT_BUILTIN_V_VVPJ (NAME, half8, int8, int4,  \
+                                                    int4, lo, hi)             \
+                              IMPLEMENT_BUILTIN_V_VVPJ (NAME, half16, int16,  \
+                                                        int8, int8, lo, hi))  \
   float _CL_OVERLOADABLE _CL_READNONE NAME (float a, float b, int *c)         \
   {                                                                           \
     return __builtin_##NAME##f (a, b, c);                                     \
@@ -1968,17 +1974,21 @@
   IMPLEMENT_BUILTIN_V_VVPJ (NAME, float8, int8, int4, int4, lo, hi)           \
   IMPLEMENT_BUILTIN_V_VVPJ (NAME, float16, int16, int8, int8, lo, hi)         \
   __IF_FP64 (                                                                 \
-  double _CL_OVERLOADABLE _CL_READNONE NAME (double a, double b, int *c)      \
-  {                                                                           \
-    return __builtin_##NAME (a, b, c);                                        \
-  }                                                                           \
-  IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __local)             \
-  IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __global)            \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, double2, int2, int, int, lo, hi)            \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, double3, int3, int2, int, lo, s2)           \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, double4, int4, int2, int2, lo, hi)          \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, double8, int8, int4, int4, lo, hi)          \
-  IMPLEMENT_BUILTIN_V_VVPJ (NAME, double16, int16, int8, int8, lo, hi))
+      double _CL_OVERLOADABLE _CL_READNONE NAME (double a, double b,          \
+                                                 int *c) {                    \
+        return __builtin_##NAME (a, b, c);                                    \
+      } IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __local)       \
+          IMPLEMENT_BUILTIN_V_VVPJ_ADDRSPACE (NAME, double, int, __global)    \
+              IMPLEMENT_BUILTIN_V_VVPJ (NAME, double2, int2, int, int, lo,    \
+                                        hi)                                   \
+                  IMPLEMENT_BUILTIN_V_VVPJ (NAME, double3, int3, int2, int,   \
+                                            lo, s2)                           \
+                      IMPLEMENT_BUILTIN_V_VVPJ (NAME, double4, int4, int2,    \
+                                                int2, lo, hi)                 \
+                          IMPLEMENT_BUILTIN_V_VVPJ (NAME, double8, int8,      \
+                                                    int4, int4, lo, hi)       \
+                              IMPLEMENT_BUILTIN_V_VVPJ (                      \
+                                  NAME, double16, int16, int8, int8, lo, hi))
 
 #define IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE(NAME, VTYPE, JTYPE, ADDRSPACE)      \
   VTYPE __attribute__ ((overloadable)) NAME (VTYPE a, ADDRSPACE JTYPE *c)     \
@@ -2003,21 +2013,23 @@
   IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, VTYPE, JTYPE, __global)
 #define DEFINE_BUILTIN_V_VPJ(NAME)                                            \
   __IF_FP16 (                                                                 \
-  half _CL_OVERLOADABLE _CL_READNONE NAME (half a, int *c)                    \
-  {                                                                           \
-    /* use float builtin */                                                   \
-    __private int d;                                                          \
-    __private float r = __builtin_##NAME##f (a, &d);                          \
-    *c = d;                                                                   \
-    return r;                                                                 \
-  }                                                                           \
-  IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __local)                \
-  IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __global)               \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, half2, int2, int, int, lo, hi)               \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, half3, int3, int2, int, lo, s2)              \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, half4, int4, int2, int2, lo, hi)             \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, half8, int8, int4, int4, lo, hi)             \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, half16, int16, int8, int8, lo, hi))          \
+      half _CL_OVERLOADABLE _CL_READNONE NAME (half a, int *c) {              \
+        /* use float builtin */                                               \
+        __private int d;                                                      \
+        __private float r = __builtin_##NAME##f (a, &d);                      \
+        *c = d;                                                               \
+        return r;                                                             \
+      } IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __local)          \
+          IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, half, int, __global)       \
+              IMPLEMENT_BUILTIN_V_VPJ (NAME, half2, int2, int, int, lo, hi)   \
+                  IMPLEMENT_BUILTIN_V_VPJ (NAME, half3, int3, int2, int, lo,  \
+                                           s2)                                \
+                      IMPLEMENT_BUILTIN_V_VPJ (NAME, half4, int4, int2, int2, \
+                                               lo, hi)                        \
+                          IMPLEMENT_BUILTIN_V_VPJ (NAME, half8, int8, int4,   \
+                                                   int4, lo, hi)              \
+                              IMPLEMENT_BUILTIN_V_VPJ (NAME, half16, int16,   \
+                                                       int8, int8, lo, hi))   \
   float _CL_OVERLOADABLE _CL_READNONE NAME (float a, int *c)                  \
   {                                                                           \
     return __builtin_##NAME##f (a, c);                                        \
@@ -2030,17 +2042,19 @@
   IMPLEMENT_BUILTIN_V_VPJ (NAME, float8, int8, int4, int4, lo, hi)            \
   IMPLEMENT_BUILTIN_V_VPJ (NAME, float16, int16, int8, int8, lo, hi)          \
   __IF_FP64 (                                                                 \
-  double _CL_OVERLOADABLE _CL_READNONE NAME (double a, int *c)                \
-  {                                                                           \
-    return __builtin_##NAME (a, c);                                           \
-  }                                                                           \
-  IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __local)              \
-  IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __global)             \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, double2, int2, int, int, lo, hi)             \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, double3, int3, int2, int, lo, s2)            \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, double4, int4, int2, int2, lo, hi)           \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, double8, int8, int4, int4, lo, hi)           \
-  IMPLEMENT_BUILTIN_V_VPJ (NAME, double16, int16, int8, int8, lo, hi))
+      double _CL_OVERLOADABLE _CL_READNONE NAME (double a, int *c) {          \
+        return __builtin_##NAME (a, c);                                       \
+      } IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __local)        \
+          IMPLEMENT_BUILTIN_V_VPJ_ADDRSPACE (NAME, double, int, __global)     \
+              IMPLEMENT_BUILTIN_V_VPJ (NAME, double2, int2, int, int, lo, hi) \
+                  IMPLEMENT_BUILTIN_V_VPJ (NAME, double3, int3, int2, int,    \
+                                           lo, s2)                            \
+                      IMPLEMENT_BUILTIN_V_VPJ (NAME, double4, int4, int2,     \
+                                               int2, lo, hi)                  \
+                          IMPLEMENT_BUILTIN_V_VPJ (NAME, double8, int8, int4, \
+                                                   int4, lo, hi)              \
+                              IMPLEMENT_BUILTIN_V_VPJ (NAME, double16, int16, \
+                                                       int8, int8, lo, hi))
 
 #define __SINGLE_WI                             \
     if (get_local_id(0) == 0 &&                 \

@pjaaskel
Copy link
Member

@pjaaskel, the formatter doesn't like the new lines I've added.

Yeah, that's what I guessed: the macro magic is too complex for it. So, pls just revert it manually for now.

@pjaaskel pjaaskel merged commit b641922 into pocl:master Jun 9, 2020
@isuruf isuruf deleted the frexp branch June 9, 2020 17:20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

CUDA: implement frexp
2 participants