Permalink
Browse files

Add initial support for half precision builtins

v2: fix fmax implementation
    use consistent checks for __CLC_FP_SIZE
    add missing TODOs
    fix whitespace in definitions.h
v3: undef ZERO in modf.inc

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
reviewer: Jeroen Ketema <j.ketema@xs4all.nl>
Reviewed-by: Aaron Watry <awatry@gmail.com>
Tested-by: Aaron Watry <awatry@gmail.com>

git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@332677 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
jvesely committed May 17, 2018
1 parent bac1578 commit a2118d58fca567694edfabea78293e0dc9255500
Showing with 690 additions and 29 deletions.
  1. +15 −0 amdgcn/lib/math/fmax.cl
  2. +15 −0 amdgcn/lib/math/fmin.cl
  3. +5 −0 amdgpu/lib/math/nextafter.cl
  4. +7 −0 amdgpu/lib/math/sqrt.cl
  5. +9 −0 generic/include/clc/as_type.h
  6. +25 −0 generic/include/clc/async/gentype.inc
  7. +6 −1 generic/include/clc/float/definitions.h
  8. +31 −0 generic/include/clc/geometric/floatn.inc
  9. +10 −0 generic/include/clc/math/binary_intrin.inc
  10. +61 −1 generic/include/clc/math/gentype.inc
  11. +3 −1 generic/include/clc/math/nan.inc
  12. +11 −0 generic/include/clc/math/ternary_intrin.inc
  13. +10 −0 generic/include/clc/math/unary_intrin.inc
  14. +41 −0 generic/include/clc/relational/floatn.inc
  15. +4 −0 generic/include/clc/relational/isequal.h
  16. +5 −0 generic/include/clc/relational/isinf.h
  17. +5 −0 generic/include/clc/relational/isnan.h
  18. +7 −2 generic/include/math/clc_ldexp.h
  19. +22 −0 generic/lib/geometric/dot.cl
  20. +40 −2 generic/lib/geometric/length.cl
  21. +10 −3 generic/lib/math/acos.inc
  22. +10 −4 generic/lib/math/asin.inc
  23. +7 −0 generic/lib/math/clc_nextafter.cl
  24. +7 −6 generic/lib/math/clc_sqrt_impl.inc
  25. +3 −0 generic/lib/math/clc_sw_binary.inc
  26. +3 −0 generic/lib/math/clc_sw_unary.inc
  27. +15 −0 generic/lib/math/copysign.cl
  28. +16 −0 generic/lib/math/fmax.cl
  29. +10 −0 generic/lib/math/fmax.inc
  30. +15 −0 generic/lib/math/fmin.cl
  31. +10 −0 generic/lib/math/fmin.inc
  32. +10 −4 generic/lib/math/fract.inc
  33. +6 −0 generic/lib/math/ldexp.cl
  34. +5 −0 generic/lib/math/ldexp.inc
  35. +3 −1 generic/lib/math/lgamma_r.inc
  36. +11 −1 generic/lib/math/modf.inc
  37. +8 −1 generic/lib/math/nan.inc
  38. +3 −0 generic/lib/math/pown.inc
  39. +3 −0 generic/lib/math/remquo.inc
  40. +3 −0 generic/lib/math/rootn.inc
  41. +3 −0 generic/lib/math/sincos.inc
  42. +8 −0 generic/lib/math/sqrt.cl
  43. +15 −1 generic/lib/relational/isequal.cl
  44. +13 −0 generic/lib/relational/isfinite.cl
  45. +15 −0 generic/lib/relational/isgreater.cl
  46. +14 −0 generic/lib/relational/isgreaterequal.cl
  47. +12 −0 generic/lib/relational/isinf.cl
  48. +14 −0 generic/lib/relational/isless.cl
  49. +14 −0 generic/lib/relational/islessequal.cl
  50. +14 −0 generic/lib/relational/islessgreater.cl
  51. +14 −0 generic/lib/relational/isnan.cl
  52. +13 −0 generic/lib/relational/isnormal.cl
  53. +10 −0 generic/lib/relational/isnotequal.cl
  54. +10 −0 generic/lib/relational/isordered.cl
  55. +14 −0 generic/lib/relational/isunordered.cl
  56. +14 −0 generic/lib/relational/signbit.cl
  57. +3 −1 generic/lib/shared/vstore_half.inc
@@ -25,6 +25,21 @@ _CLC_DEF _CLC_OVERLOAD double fmax(double x, double y)
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
return (y < x) ? x : y;
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
#endif
#define __CLC_BODY <../../../generic/lib/math/fmax.inc>
@@ -25,6 +25,21 @@ _CLC_DEF _CLC_OVERLOAD double fmin(double x, double y)
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
return (y < x) ? y : x;
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
#endif
#define __CLC_BODY <../../../generic/lib/math/fmin.inc>
@@ -8,3 +8,8 @@ _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEFINE_BINARY_BUILTIN(half, nextafter, __clc_nextafter, half, half)
#endif
@@ -26,6 +26,13 @@
_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
#endif
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -66,3 +66,12 @@
#define as_double8(x) __builtin_astype(x, double8)
#define as_double16(x) __builtin_astype(x, double16)
#endif
#ifdef cl_khr_fp16
#define as_half(x) __builtin_astype(x, half)
#define as_half2(x) __builtin_astype(x, half2)
#define as_half3(x) __builtin_astype(x, half3)
#define as_half4(x) __builtin_astype(x, half4)
#define as_half8(x) __builtin_astype(x, half8)
#define as_half16(x) __builtin_astype(x, half16)
#endif
@@ -204,4 +204,29 @@
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16: enable
#define __CLC_GENTYPE half
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE half16
#include __CLC_BODY
#undef __CLC_GENTYPE
#endif
#undef __CLC_BODY
@@ -64,7 +64,7 @@
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
#ifdef __CLC_INTERNAL
#define M_LOG210 0x1.a934f0979a371p+1
#define M_LOG210 0x1.a934f0979a371p+1
#endif
#endif
@@ -80,6 +80,11 @@
#define HALF_MIN_10_EXP -4
#define HALF_MIN_EXP -13
#define HALF_RADIX 2
#define HALF_MAX 0x1.ffcp15h
#define HALF_MIN 0x1.0p-14h
#define HALF_EPSILON 0x1.0p-10h
#endif
#endif
@@ -53,4 +53,35 @@
#endif
#endif
#ifndef __FLOAT_ONLY
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define __CLC_FLOAT half
#define __CLC_FPSIZE 16
#define __CLC_FLOATN half
#define __CLC_SCALAR
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_SCALAR
#define __CLC_FLOATN half2
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN half3
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN half4
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_FLOAT
#undef __CLC_FPSIZE
#endif
#endif
#undef __CLC_BODY
@@ -15,5 +15,15 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8) __asm(__CLC_INTRINSIC ".v
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD half __CLC_FUNCTION(half, half) __asm(__CLC_INTRINSIC ".f16");
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2, half2) __asm(__CLC_INTRINSIC ".v2f16");
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3, half3) __asm(__CLC_INTRINSIC ".v3f16");
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4, half4) __asm(__CLC_INTRINSIC ".v4f16");
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8, half8) __asm(__CLC_INTRINSIC ".v8f16");
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16, half16) __asm(__CLC_INTRINSIC ".v16f16");
#endif
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
@@ -110,6 +110,66 @@
#undef __CLC_FPSIZE
#undef __CLC_SCALAR_GENTYPE
#endif
#endif
#undef __CLC_BODY
#ifndef __FLOAT_ONLY
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define __CLC_SCALAR_GENTYPE half
#define __CLC_FPSIZE 16
#define __CLC_SCALAR
#define __CLC_GENTYPE half
#define __CLC_INTN int
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_INTN
#undef __CLC_SCALAR
#define __CLC_GENTYPE half2
#define __CLC_INTN int2
#define __CLC_VECSIZE 2
#include __CLC_BODY
#undef __CLC_VECSIZE
#undef __CLC_GENTYPE
#undef __CLC_INTN
#define __CLC_GENTYPE half3
#define __CLC_INTN int3
#define __CLC_VECSIZE 3
#include __CLC_BODY
#undef __CLC_VECSIZE
#undef __CLC_GENTYPE
#undef __CLC_INTN
#define __CLC_GENTYPE half4
#define __CLC_INTN int4
#define __CLC_VECSIZE 4
#include __CLC_BODY
#undef __CLC_VECSIZE
#undef __CLC_GENTYPE
#undef __CLC_INTN
#define __CLC_GENTYPE half8
#define __CLC_INTN int8
#define __CLC_VECSIZE 8
#include __CLC_BODY
#undef __CLC_VECSIZE
#undef __CLC_GENTYPE
#undef __CLC_INTN
#define __CLC_GENTYPE half16
#define __CLC_INTN int16
#define __CLC_VECSIZE 16
#include __CLC_BODY
#undef __CLC_VECSIZE
#undef __CLC_GENTYPE
#undef __CLC_INTN
#undef __CLC_FPSIZE
#undef __CLC_SCALAR_GENTYPE
#endif
#endif
#undef __CLC_BODY
@@ -4,8 +4,10 @@
#if __CLC_FPSIZE == 64
#define __CLC_NATN __CLC_XCONCAT(ulong, __CLC_VECSIZE)
#else
#elif __CLC_FPSIZE == 32
#define __CLC_NATN __CLC_XCONCAT(uint, __CLC_VECSIZE)
#elif __CLC_FPSIZE == 16
#define __CLC_NATN __CLC_XCONCAT(ushort, __CLC_VECSIZE)
#endif
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_NATN code);
@@ -15,5 +15,16 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8, double8) __asm(__CLC_INTR
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16: enable
_CLC_OVERLOAD half __CLC_FUNCTION(half, half, half) __asm(__CLC_INTRINSIC ".f16");
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2, half2, half2) __asm(__CLC_INTRINSIC ".v2f16");
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3, half3, half3) __asm(__CLC_INTRINSIC ".v3f16");
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4, half4, half4) __asm(__CLC_INTRINSIC ".v4f16");
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8, half8, half8) __asm(__CLC_INTRINSIC ".v8f16");
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16, half16, half16) __asm(__CLC_INTRINSIC ".v16f16");
#endif
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
@@ -15,5 +15,15 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16: enable
_CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
#endif
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
@@ -39,6 +39,7 @@
#undef __CLC_INT
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define __CLC_FLOATN double
#define __CLC_INTN int
@@ -76,6 +77,46 @@
#undef __CLC_INTN
#undef __CLC_FLOATN
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define __CLC_FLOATN half
#define __CLC_INTN int
#include __CLC_BODY
#undef __CLC_INTN
#undef __CLC_FLOATN
#define __CLC_FLOATN half2
#define __CLC_INTN short2
#include __CLC_BODY
#undef __CLC_INTN
#undef __CLC_FLOATN
#define __CLC_FLOATN half3
#define __CLC_INTN short3
#include __CLC_BODY
#undef __CLC_INTN
#undef __CLC_FLOATN
#define __CLC_FLOATN half4
#define __CLC_INTN short4
#include __CLC_BODY
#undef __CLC_INTN
#undef __CLC_FLOATN
#define __CLC_FLOATN half8
#define __CLC_INTN short8
#include __CLC_BODY
#undef __CLC_INTN
#undef __CLC_FLOATN
#define __CLC_FLOATN half16
#define __CLC_INTN short16
#include __CLC_BODY
#undef __CLC_INTN
#undef __CLC_FLOATN
#endif
#undef __CLC_BODY
@@ -15,6 +15,10 @@ _CLC_VECTOR_ISEQUAL_DECL(float, int)
_CLC_ISEQUAL_DECL(double, int)
_CLC_VECTOR_ISEQUAL_DECL(double, long)
#endif
#ifdef cl_khr_fp16
_CLC_ISEQUAL_DECL(half, int)
_CLC_VECTOR_ISEQUAL_DECL(half, short)
#endif
#undef _CLC_ISEQUAL_DECL
#undef _CLC_VECTOR_ISEQUAL_DEC
@@ -17,5 +17,10 @@ _CLC_ISINF_DECL(int, double)
_CLC_VECTOR_ISINF_DECL(long, double)
#endif
#ifdef cl_khr_fp16
_CLC_ISINF_DECL(int, half)
_CLC_VECTOR_ISINF_DECL(short, half)
#endif
#undef _CLC_ISINF_DECL
#undef _CLC_VECTOR_ISINF_DECL
@@ -17,5 +17,10 @@ _CLC_ISNAN_DECL(int, double)
_CLC_VECTOR_ISNAN_DECL(long, double)
#endif
#ifdef cl_khr_fp16
_CLC_ISNAN_DECL(int, half)
_CLC_VECTOR_ISNAN_DECL(short, half)
#endif
#undef _CLC_ISNAN_DECL
#undef _CLC_VECTOR_ISNAN_DECL
@@ -1,6 +1,11 @@
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int);
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
#endif
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
#endif
Oops, something went wrong.

0 comments on commit a2118d5

Please sign in to comment.