diff --git a/libclc/clspv/lib/math/fma.cl b/libclc/clspv/lib/math/fma.cl index 4f2806933eda99..3ffca28bd3bef6 100644 --- a/libclc/clspv/lib/math/fma.cl +++ b/libclc/clspv/lib/math/fma.cl @@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) { ((uint)st_fma.mantissa.lo & 0x7fffff)); } _CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float) + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) { + return (half)mad((float)a, (float)b, (float)c); +} +_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half) + +#endif diff --git a/libclc/generic/include/clc/convert.h b/libclc/generic/include/clc/convert.h index f0ba796864d4dd..db7bb0402491e8 100644 --- a/libclc/generic/include/clc/convert.h +++ b/libclc/generic/include/clc/convert.h @@ -20,10 +20,19 @@ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX) -#ifdef cl_khr_fp64 +#if defined(cl_khr_fp64) && defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX) +#elif defined(cl_khr_fp64) #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX) +#elif defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ + _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX) #else #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) @@ -40,11 +49,19 @@ _CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(float, SUFFIX) -#ifdef cl_khr_fp64 +#if defined(cl_khr_fp64) && defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ + _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(half, SUFFIX) +#elif defined(cl_khr_fp64) #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(double, SUFFIX) -#else +#elif defined(cl_khr_fp16) +#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ + _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ + _CLC_VECTOR_CONVERT_FROM(half, SUFFIX) #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \ _CLC_VECTOR_CONVERT_TO1(SUFFIX) #endif diff --git a/libclc/generic/include/math/clc_ldexp.h b/libclc/generic/include/math/clc_ldexp.h index dbfc0447446fea..454b7ed3dcee5a 100644 --- a/libclc/generic/include/math/clc_ldexp.h +++ b/libclc/generic/include/math/clc_ldexp.h @@ -7,5 +7,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int); #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int); +_CLC_DEF _CLC_OVERLOAD half __clc_ldexp(half, int); #endif diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h index f148dc37bae5c9..f655368967c9ee 100644 --- a/libclc/generic/lib/clcmacro.h +++ b/libclc/generic/lib/clcmacro.h @@ -1,3 +1,5 @@ +#include + #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ @@ -86,64 +88,76 @@ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \ } -#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \ - return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \ - return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \ - FUNCTION(x, y, z.z)); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \ - return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \ - return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \ - return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ - } \ -\ +#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ARG2_TYPE, ARG3_TYPE) \ + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \ + return (RET_TYPE##2)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##3 z) { \ + return (RET_TYPE##3)(FUNCTION(x, y, z.x), FUNCTION(x, y, z.y), \ + FUNCTION(x, y, z.z)); \ + } \ + \ + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##4 z) { \ + return (RET_TYPE##4)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##8 z) { \ + return (RET_TYPE##8)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } \ + \ + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \ + return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \ + } -#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ADDR_SPACE, ARG2_TYPE) \ - DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ADDR_SPACE ARG2_TYPE##2 *y) { \ - return (RET_TYPE##2)( \ - FUNCTION(x.x, (ARG2_TYPE*)y), \ - FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ADDR_SPACE ARG2_TYPE##3 *y) { \ - return (RET_TYPE##3)( \ - FUNCTION(x.x, (ARG2_TYPE*)y), \ - FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)), \ - FUNCTION(x.z, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+2)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ADDR_SPACE ARG2_TYPE##4 *y) { \ - return (RET_TYPE##4)( \ - FUNCTION(x.lo, (ARG2_TYPE##2*)y), \ - FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##2*)((ADDR_SPACE ARG2_TYPE*)y+2)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ADDR_SPACE ARG2_TYPE##8 *y) { \ - return (RET_TYPE##8)( \ - FUNCTION(x.lo, (ARG2_TYPE##4*)y), \ - FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##4*)((ADDR_SPACE ARG2_TYPE*)y+4)) \ - ); \ - } \ -\ - DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ADDR_SPACE ARG2_TYPE##16 *y) { \ - return (RET_TYPE##16)( \ - FUNCTION(x.lo, (ARG2_TYPE##8*)y), \ - FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##8*)((ADDR_SPACE ARG2_TYPE*)y+8)) \ - ); \ +#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \ + ADDR_SPACE, ARG2_TYPE) \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 2))( \ + FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ + FUNCTION(x.y, \ + (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 3))( \ + FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ + FUNCTION(x.y, \ + (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \ + FUNCTION(x.z, \ + (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 4))( \ + FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \ + FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ + ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 8))( \ + FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \ + FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ + ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ + FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ + return (__CLC_XCONCAT(RET_TYPE, 16))( \ + FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \ + FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ + ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \ } #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ @@ -161,3 +175,26 @@ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \ return BUILTIN(x); \ } \ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE) + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) \ + _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \ + return (half)FUNCTION((float)x); \ + } \ + _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half) + +#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) \ + _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \ + return (half)FUNCTION((float)x, (float)y); \ + } \ + _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half) + +#else + +#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) +#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) + +#endif diff --git a/libclc/generic/lib/gen_convert.py b/libclc/generic/lib/gen_convert.py index 21fc8ebc80d156..bd36faa4e9197d 100644 --- a/libclc/generic/lib/gen_convert.py +++ b/libclc/generic/lib/gen_convert.py @@ -46,21 +46,21 @@ "uint", "long", "ulong", + "half", "float", "double", ] int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"] unsigned_types = ["uchar", "ushort", "uint", "ulong"] -float_types = ["float", "double"] +float_types = ["half", "float", "double"] int64_types = ["long", "ulong"] float64_types = ["double"] +float16_types = ["half"] vector_sizes = ["", "2", "3", "4", "8", "16"] half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")] saturation = ["", "_sat"] rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"] -float_prefix = {"float": "FLT_", "double": "DBL_"} -float_suffix = {"float": "f", "double": ""} bool_type = { "char": "char", @@ -71,6 +71,7 @@ "uint": "int", "long": "long", "ulong": "long", + "half": "short", "float": "int", "double": "long", } @@ -95,6 +96,7 @@ "uint": 4, "long": 8, "ulong": 8, + "half": 2, "float": 4, "double": 8, } @@ -108,6 +110,7 @@ "uint": "UINT_MAX", "long": "LONG_MAX", "ulong": "ULONG_MAX", + "half": "0x1.ffcp+15", } limit_min = { @@ -119,24 +122,33 @@ "uint": "0", "long": "LONG_MIN", "ulong": "0", + "half": "-0x1.ffcp+15", } def conditional_guard(src, dst): int64_count = 0 float64_count = 0 + float16_count = 0 if src in int64_types: int64_count = int64_count + 1 elif src in float64_types: float64_count = float64_count + 1 + elif src in float16_types: + float16_count = float16_count + 1 if dst in int64_types: int64_count = int64_count + 1 elif dst in float64_types: float64_count = float64_count + 1 + elif dst in float16_types: + float16_count = float16_count + 1 if float64_count > 0: # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be print("#ifdef cl_khr_fp64") return True + elif float16_count > 0: + print("#if defined cl_khr_fp16") + return True elif int64_count > 0: print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)") return True @@ -175,6 +187,10 @@ def conditional_guard(src, dst): #include +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -498,22 +514,42 @@ def generate_float_conversion(src, dst, size, mode, sat): ) ) print( - " return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format( + " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format( DST=dst, N=size, BOOL=bool_type[dst], SRC=src ) ) else: print( - " return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format( + " {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format( DST=dst, N=size, BOOL=bool_type[dst] ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + dst_max = limit_max[dst] + # short is 16 bits signed, so the maximum value rounded to zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) + if src == "short": + dst_max = "0x1.ffcp+14" + print( + " return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format( + DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max + ) + ) + else: + print(" return sel;") if mode == "_rtp": print( - " return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format( + " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format( DST=dst, N=size, BOOL=bool_type[dst] ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + print( + " return max(sel, ({DST}{N}){DST_MIN});".format( + DST=dst, N=size, DST_MIN=limit_min[dst] + ) + ) + else: + print(" return sel;") if mode == "_rtn": if clspv: print( @@ -528,16 +564,28 @@ def generate_float_conversion(src, dst, size, mode, sat): ) ) print( - " return select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format( + " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format( DST=dst, N=size, BOOL=bool_type[dst], SRC=src ) ) else: print( - " return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format( + " {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format( DST=dst, N=size, BOOL=bool_type[dst] ) ) + if dst == "half" and src in int_types and sizeof_type[src] >= 2: + dst_max = limit_max[dst] + # short is 16 bits signed, so the maximum value rounded to negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767) + if src == "short": + dst_max = "0x1.ffcp+14" + print( + " return min(sel, ({DST}{N}){DST_MAX});".format( + DST=dst, N=size, DST_MAX=dst_max + ) + ) + else: + print(" return sel;") # Footer print("}") diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl index 87db01416c86ee..af59f443e87173 100644 --- a/libclc/generic/lib/math/acos.cl +++ b/libclc/generic/lib/math/acos.cl @@ -171,3 +171,5 @@ _CLC_OVERLOAD _CLC_DEF double acos(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double); #endif // cl_khr_fp64 + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acos) diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl index 59da5116277445..6e8dd78c3c00c1 100644 --- a/libclc/generic/lib/math/acosh.cl +++ b/libclc/generic/lib/math/acosh.cl @@ -125,3 +125,5 @@ _CLC_OVERLOAD _CLC_DEF double acosh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh) diff --git a/libclc/generic/lib/math/acospi.cl b/libclc/generic/lib/math/acospi.cl index c91fc41789647b..7ebf802b5fbc59 100644 --- a/libclc/generic/lib/math/acospi.cl +++ b/libclc/generic/lib/math/acospi.cl @@ -170,3 +170,5 @@ _CLC_OVERLOAD _CLC_DEF double acospi(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(acospi) diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl index cfddb31c68c35c..9f8ddad873af07 100644 --- a/libclc/generic/lib/math/asinh.cl +++ b/libclc/generic/lib/math/asinh.cl @@ -291,3 +291,5 @@ _CLC_OVERLOAD _CLC_DEF double asinh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh) diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl index fa3633cef7480f..2fa6e8d116cc35 100644 --- a/libclc/generic/lib/math/atan.cl +++ b/libclc/generic/lib/math/atan.cl @@ -20,11 +20,11 @@ * THE SOFTWARE. */ +#include + #include "math.h" #include "../clcmacro.h" -#include - _CLC_OVERLOAD _CLC_DEF float atan(float x) { const float piby2 = 1.5707963267948966f; // 0x3ff921fb54442d18 @@ -181,3 +181,6 @@ _CLC_OVERLOAD _CLC_DEF double atan(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double); #endif // cl_khr_fp64 + + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atan) diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl index a2f104fa185b6e..d260338b83ec7e 100644 --- a/libclc/generic/lib/math/atan2.cl +++ b/libclc/generic/lib/math/atan2.cl @@ -235,3 +235,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2(double y, double x) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double); #endif + +_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2) diff --git a/libclc/generic/lib/math/atan2pi.cl b/libclc/generic/lib/math/atan2pi.cl index a15b14fd319d83..ad2eda3dec87ca 100644 --- a/libclc/generic/lib/math/atan2pi.cl +++ b/libclc/generic/lib/math/atan2pi.cl @@ -219,3 +219,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) { _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double) #endif + +_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi) diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl index 4af2f458536a52..524af025b0b586 100644 --- a/libclc/generic/lib/math/atanh.cl +++ b/libclc/generic/lib/math/atanh.cl @@ -111,3 +111,5 @@ _CLC_OVERLOAD _CLC_DEF double atanh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atanh) diff --git a/libclc/generic/lib/math/atanpi.cl b/libclc/generic/lib/math/atanpi.cl index 2e2f032d8e6c26..625af12ba85185 100644 --- a/libclc/generic/lib/math/atanpi.cl +++ b/libclc/generic/lib/math/atanpi.cl @@ -180,3 +180,5 @@ _CLC_OVERLOAD _CLC_DEF double atanpi(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi) diff --git a/libclc/generic/lib/math/cbrt.cl b/libclc/generic/lib/math/cbrt.cl index 5ff9367c898916..2f5ff9516ed768 100644 --- a/libclc/generic/lib/math/cbrt.cl +++ b/libclc/generic/lib/math/cbrt.cl @@ -149,3 +149,5 @@ _CLC_OVERLOAD _CLC_DEF double cbrt(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt) diff --git a/libclc/generic/lib/math/clc_ldexp.cl b/libclc/generic/lib/math/clc_ldexp.cl index 61e34a521609cf..ae6117b7b29224 100644 --- a/libclc/generic/lib/math/clc_ldexp.cl +++ b/libclc/generic/lib/math/clc_ldexp.cl @@ -126,3 +126,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) { } #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) { + return (half)__clc_ldexp((float)x, n); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_ldexp, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl index 0b7ac327512db3..1a1e2acec6eda5 100644 --- a/libclc/generic/lib/math/clc_pown.cl +++ b/libclc/generic/lib/math/clc_pown.cl @@ -368,3 +368,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) { + return (half)__clc_pown((float)x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl index 3b9159ac967efb..edf4422610f4f5 100644 --- a/libclc/generic/lib/math/clc_remquo.cl +++ b/libclc/generic/lib/math/clc_remquo.cl @@ -254,3 +254,18 @@ __VEC_REMQUO(double, 4, 2) __VEC_REMQUO(double, 8, 4) __VEC_REMQUO(double, 16, 8) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_remquo(half x, half y, __private int *pquo) { + return (half)__clc_remquo((float)x, (float)y, pquo); +} +__VEC_REMQUO(half, 2,) +__VEC_REMQUO(half, 3, 2) +__VEC_REMQUO(half, 4, 2) +__VEC_REMQUO(half, 8, 4) +__VEC_REMQUO(half, 16, 8) + +#endif diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl index 0a2c98d3787cff..040b614f5feb4c 100644 --- a/libclc/generic/lib/math/clc_rootn.cl +++ b/libclc/generic/lib/math/clc_rootn.cl @@ -368,3 +368,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int) #endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) { + return (half)__clc_rootn((float)x, y); +} + +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int); + +#endif diff --git a/libclc/generic/lib/math/clc_sw_binary.inc b/libclc/generic/lib/math/clc_sw_binary.inc index 7741475c23717c..5cf15a21f78053 100644 --- a/libclc/generic/lib/math/clc_sw_binary.inc +++ b/libclc/generic/lib/math/clc_sw_binary.inc @@ -2,11 +2,25 @@ #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) -// TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) { return __CLC_SW_FUNC(__CLC_FUNC)(x, y); } +#elif __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return convert_half( + __CLC_SW_FUNC(__CLC_FUNC)(convert_float(x), convert_float(y))); +} +#else +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, + __CLC_GENTYPE y) { + return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)( + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x), + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(y))); +} +#endif #endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/clc_sw_unary.inc b/libclc/generic/lib/math/clc_sw_unary.inc index cd148b07a02c38..9b908aee87a18c 100644 --- a/libclc/generic/lib/math/clc_sw_unary.inc +++ b/libclc/generic/lib/math/clc_sw_unary.inc @@ -2,11 +2,21 @@ #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x) -// TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { return __CLC_SW_FUNC(__CLC_FUNC)(x); } +#elif __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return convert_half(__CLC_SW_FUNC(__CLC_FUNC)(convert_float(x))); +} +#else +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) { + return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)( + __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x))); +} +#endif #endif #undef __CLC_SW_FUNC diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl index 157447f9cd7ce6..0af7aa67ba0090 100644 --- a/libclc/generic/lib/math/cos.cl +++ b/libclc/generic/lib/math/cos.cl @@ -75,3 +75,5 @@ _CLC_OVERLOAD _CLC_DEF double cos(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double); #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cos) diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl index 1a672755d1f7c0..0987d1f9216922 100644 --- a/libclc/generic/lib/math/cosh.cl +++ b/libclc/generic/lib/math/cosh.cl @@ -190,3 +190,5 @@ _CLC_OVERLOAD _CLC_DEF double cosh(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh) diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl index 108b637c9abb61..d6ab27ba021e2e 100644 --- a/libclc/generic/lib/math/cospi.cl +++ b/libclc/generic/lib/math/cospi.cl @@ -134,3 +134,5 @@ _CLC_OVERLOAD _CLC_DEF double cospi(double x) { } _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double); #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi) diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl index 37f693c39be2b0..cc7b7f39bf1dbc 100644 --- a/libclc/generic/lib/math/exp.cl +++ b/libclc/generic/lib/math/exp.cl @@ -88,3 +88,5 @@ _CLC_OVERLOAD _CLC_DEF double exp(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(exp) diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl index 9a3a90718a68d1..9a0aa37ac333bf 100644 --- a/libclc/generic/lib/math/expm1.cl +++ b/libclc/generic/lib/math/expm1.cl @@ -140,3 +140,5 @@ _CLC_OVERLOAD _CLC_DEF double expm1(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1) diff --git a/libclc/generic/lib/math/fdim.inc b/libclc/generic/lib/math/fdim.inc index 9aa3496b189021..98cbef60766677 100644 --- a/libclc/generic/lib/math/fdim.inc +++ b/libclc/generic/lib/math/fdim.inc @@ -69,3 +69,28 @@ __CLC_FDIM_VEC(16) #undef __CLC_FDIM_VEC #endif #endif + +#if __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +#define QNANBITPATT_FP16 ((short)0x7e00) +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x, + private __CLC_GENTYPE y) { + short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16; + short r = -(x > y) & as_short(x - y); + return as_half((short)(n | r)); +} +#define __CLC_FDIM_VEC(width) \ + _CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) { \ + /* See comment in float implementation for explanation. */ \ + short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16; \ + short##width r = (x > y) & as_short##width(x - y); \ + return as_half##width(n | r); \ + } +__CLC_FDIM_VEC(2) +__CLC_FDIM_VEC(3) +__CLC_FDIM_VEC(4) +__CLC_FDIM_VEC(8) +__CLC_FDIM_VEC(16) +#undef __CLC_FDIM_VEC +#endif +#endif diff --git a/libclc/generic/lib/math/frexp.inc b/libclc/generic/lib/math/frexp.inc index b61cc3592a2a9e..e6e2af49235a45 100644 --- a/libclc/generic/lib/math/frexp.inc +++ b/libclc/generic/lib/math/frexp.inc @@ -21,6 +21,8 @@ * THE SOFTWARE. */ +#include "../clcmacro.h" + #define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE) #define __CLC_AS_INTN __CLC_XCONCAT(as_, __CLC_INTN) @@ -40,6 +42,17 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE } #endif +#if __CLC_FPSIZE == 16 +#ifdef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, + __CLC_ADDRESS_SPACE __CLC_INTN *ep) { + return (__CLC_GENTYPE)frexp((float)x, ep); +} +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp, + __CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN); +#endif +#endif + #if __CLC_FPSIZE == 64 #ifdef __CLC_SCALAR #define __CLC_AS_LONGN as_long diff --git a/libclc/generic/lib/math/ilogb.cl b/libclc/generic/lib/math/ilogb.cl index 050239c9c1ffa5..d085e8608b01a5 100644 --- a/libclc/generic/lib/math/ilogb.cl +++ b/libclc/generic/lib/math/ilogb.cl @@ -71,3 +71,15 @@ _CLC_OVERLOAD _CLC_DEF int ilogb(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF int ilogb(half x) { + return ilogb((float)x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half); + +#endif diff --git a/libclc/generic/lib/math/lgamma.cl b/libclc/generic/lib/math/lgamma.cl index 26cd20eb01b86b..024894f2f304fd 100644 --- a/libclc/generic/lib/math/lgamma.cl +++ b/libclc/generic/lib/math/lgamma.cl @@ -41,4 +41,6 @@ _CLC_OVERLOAD _CLC_DEF double lgamma(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma, double) -#endif \ No newline at end of file +#endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(lgamma) diff --git a/libclc/generic/lib/math/lgamma_r.cl b/libclc/generic/lib/math/lgamma_r.cl index ff447386ac064d..fe28e420d1286b 100644 --- a/libclc/generic/lib/math/lgamma_r.cl +++ b/libclc/generic/lib/math/lgamma_r.cl @@ -486,6 +486,17 @@ _CLC_OVERLOAD _CLC_DEF double lgamma_r(double x, private int *ip) { _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma_r, double, private, int) #endif +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half lgamma_r(half x, private int *iptr) { + return (half)lgamma_r((float)x, iptr); +} + +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, lgamma_r, half, private, int); + +#endif #define __CLC_ADDRSPACE global #define __CLC_BODY diff --git a/libclc/generic/lib/math/lgamma_r.inc b/libclc/generic/lib/math/lgamma_r.inc index 0e19ba8fb2c7cb..8aa17fbe79bd8e 100644 --- a/libclc/generic/lib/math/lgamma_r.inc +++ b/libclc/generic/lib/math/lgamma_r.inc @@ -21,12 +21,9 @@ * THE SOFTWARE. */ -// TODO: Enable half precision when the base version is implemented. -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { __CLC_INTN private_iptr; __CLC_GENTYPE ret = lgamma_r(x, &private_iptr); *iptr = private_iptr; return ret; } -#endif diff --git a/libclc/generic/lib/math/log10.cl b/libclc/generic/lib/math/log10.cl index 35a53a1eb5f3de..e669f3148f9c02 100644 --- a/libclc/generic/lib/math/log10.cl +++ b/libclc/generic/lib/math/log10.cl @@ -28,6 +28,10 @@ #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // cl_khr_fp16 + #define COMPILING_LOG10 #include "log_base.h" #undef COMPILING_LOG10 @@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float); #ifdef cl_khr_fp64 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log10, half); +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl index be25c64bf6a436..42fd9d3a23f342 100644 --- a/libclc/generic/lib/math/log1p.cl +++ b/libclc/generic/lib/math/log1p.cl @@ -175,3 +175,5 @@ _CLC_OVERLOAD _CLC_DEF double log1p(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double); #endif // cl_khr_fp64 + +_CLC_DEFINE_UNARY_BUILTIN_FP16(log1p) diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl index 8776a80ec3be4b..64463557e3f6db 100644 --- a/libclc/generic/lib/math/log2.cl +++ b/libclc/generic/lib/math/log2.cl @@ -28,6 +28,10 @@ #pragma OPENCL EXTENSION cl_khr_fp64 : enable #endif // cl_khr_fp64 +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif // cl_khr_fp16 + #define COMPILING_LOG2 #include "log_base.h" #undef COMPILING_LOG2 @@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, float); #ifdef cl_khr_fp64 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double); #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log2, half); +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/log_base.h b/libclc/generic/lib/math/log_base.h index 4e20329f641bb5..b8110ca1779a23 100644 --- a/libclc/generic/lib/math/log_base.h +++ b/libclc/generic/lib/math/log_base.h @@ -295,3 +295,22 @@ log(double x) } #endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 + +_CLC_OVERLOAD _CLC_DEF half +#if defined(COMPILING_LOG2) +log2(half x) { + return (half)log2((float)x); +} +#elif defined(COMPILING_LOG10) +log10(half x) { + return (half)log10((float)x); +} +#else +log(half x) { + return (half)log((float)x); +} +#endif + +#endif // cl_khr_fp16 diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl index 31e51616534319..c0c2b5de40ebce 100644 --- a/libclc/generic/lib/math/logb.cl +++ b/libclc/generic/lib/math/logb.cl @@ -29,3 +29,5 @@ _CLC_OVERLOAD _CLC_DEF double logb(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(logb) diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc index 2add2c7459de90..84729d90a796fa 100644 --- a/libclc/generic/lib/math/pown.inc +++ b/libclc/generic/lib/math/pown.inc @@ -1,6 +1,3 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_pown(x, y); } -#endif diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc index c33b5ddab31126..c1de78a5e7f9c3 100644 --- a/libclc/generic/lib/math/remquo.inc +++ b/libclc/generic/lib/math/remquo.inc @@ -1,9 +1,6 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { __CLC_INTN local_q; __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); *q = local_q; return ret; } -#endif diff --git a/libclc/generic/lib/math/rootn.inc b/libclc/generic/lib/math/rootn.inc index f788649685ac90..3f5b00c082cd35 100644 --- a/libclc/generic/lib/math/rootn.inc +++ b/libclc/generic/lib/math/rootn.inc @@ -1,6 +1,3 @@ -// TODO: Enable half precision when the sw routine is implemented -#if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) { return __clc_rootn(x, y); } -#endif diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl index 3a4074925b83ef..689c3a1dbd0c8e 100644 --- a/libclc/generic/lib/math/sin.cl +++ b/libclc/generic/lib/math/sin.cl @@ -77,3 +77,5 @@ _CLC_OVERLOAD _CLC_DEF double sin(double x) { _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double); #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sin) diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc index 2318ffb73f55bd..e97f0f9641c1cf 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/generic/lib/math/sincos.inc @@ -1,5 +1,3 @@ -// TODO: Enable half precision when sin/cos is implemented -#if __CLC_FPSIZE > 16 #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \ *cosval = cos(x); \ @@ -11,4 +9,3 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) #undef __CLC_DECLARE_SINCOS -#endif diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl index 9159b89222c28e..a7a092f1f547db 100644 --- a/libclc/generic/lib/math/sinh.cl +++ b/libclc/generic/lib/math/sinh.cl @@ -189,3 +189,5 @@ _CLC_OVERLOAD _CLC_DEF double sinh(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh) diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl index dbb995fe0cd9c3..c8a1031df0216d 100644 --- a/libclc/generic/lib/math/sinpi.cl +++ b/libclc/generic/lib/math/sinpi.cl @@ -129,3 +129,5 @@ _CLC_OVERLOAD _CLC_DEF double sinpi(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double) #endif + +_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi) diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl index e9c4079ff311e0..6a6810cec11384 100644 --- a/libclc/generic/lib/math/tanh.cl +++ b/libclc/generic/lib/math/tanh.cl @@ -144,3 +144,5 @@ _CLC_OVERLOAD _CLC_DEF double tanh(double x) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double); #endif // cl_khr_fp64 + +_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)