[AMDGPU] Add f16 builtin functions (VI+)

Differential Revision: https://reviews.llvm.org/D26476 llvm-svn: 286741
llvm · Nov 13, 2016 · 81a78bb · 81a78bb
1 parent e003871
commit 81a78bb
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 5 deletions.
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -84,6 +84,16 @@ BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
 // VI+ only builtins.
 //===----------------------------------------------------------------------===//
 
+TARGET_BUILTIN(__builtin_amdgcn_div_fixuph, "hhhh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_rcph, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_rsqh, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sinh, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ldexph, "hhi", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_frexp_manth, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "ih", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
 
 //===----------------------------------------------------------------------===//

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8190,38 +8190,45 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
   case AMDGPU::BI__builtin_amdgcn_div_fixup:
   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
+  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
   case AMDGPU::BI__builtin_amdgcn_trig_preop:
   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
   case AMDGPU::BI__builtin_amdgcn_rcp:
   case AMDGPU::BI__builtin_amdgcn_rcpf:
+  case AMDGPU::BI__builtin_amdgcn_rcph:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
   case AMDGPU::BI__builtin_amdgcn_rsq:
   case AMDGPU::BI__builtin_amdgcn_rsqf:
+  case AMDGPU::BI__builtin_amdgcn_rsqh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
   case AMDGPU::BI__builtin_amdgcn_sinf:
+  case AMDGPU::BI__builtin_amdgcn_sinh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
   case AMDGPU::BI__builtin_amdgcn_cosf:
+  case AMDGPU::BI__builtin_amdgcn_cosh:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
   case AMDGPU::BI__builtin_amdgcn_log_clampf:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
   case AMDGPU::BI__builtin_amdgcn_ldexp:
   case AMDGPU::BI__builtin_amdgcn_ldexpf:
+  case AMDGPU::BI__builtin_amdgcn_ldexph:
     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
-  case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
+  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
+  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
-  }
   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
-  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
+  case AMDGPU::BI__builtin_amdgcn_frexp_expf:
+  case AMDGPU::BI__builtin_amdgcn_frexp_exph:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
-  }
   case AMDGPU::BI__builtin_amdgcn_fract:
   case AMDGPU::BI__builtin_amdgcn_fractf:
+  case AMDGPU::BI__builtin_amdgcn_fracth:
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
   case AMDGPU::BI__builtin_amdgcn_lerp:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
@@ -8235,6 +8242,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
   case AMDGPU::BI__builtin_amdgcn_class:
   case AMDGPU::BI__builtin_amdgcn_classf:
+  case AMDGPU::BI__builtin_amdgcn_classh:
     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
 
   case AMDGPU::BI__builtin_amdgcn_read_exec: {

diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,8 +1,79 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -o - %s | FileCheck %s
 
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
 typedef unsigned long ulong;
 
+// CHECK-LABEL: @test_div_fixup_f16
+// CHECK: call half @llvm.amdgcn.div.fixup.f16
+void test_div_fixup_f16(global half* out, half a, half b, half c)
+{
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
+
+// CHECK-LABEL: @test_rcp_f16
+// CHECK: call half @llvm.amdgcn.rcp.f16
+void test_rcp_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rcph(a);
+}
+
+// CHECK-LABEL: @test_rsq_f16
+// CHECK: call half @llvm.amdgcn.rsq.f16
+void test_rsq_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rsqh(a);
+}
+
+// CHECK-LABEL: @test_sin_f16
+// CHECK: call half @llvm.amdgcn.sin.f16
+void test_sin_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_sinh(a);
+}
+
+// CHECK-LABEL: @test_cos_f16
+// CHECK: call half @llvm.amdgcn.cos.f16
+void test_cos_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_cosh(a);
+}
+
+// CHECK-LABEL: @test_ldexp_f16
+// CHECK: call half @llvm.amdgcn.ldexp.f16
+void test_ldexp_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_ldexph(a, b);
+}
+
+// CHECK-LABEL: @test_frexp_mant_f16
+// CHECK: call half @llvm.amdgcn.frexp.mant.f16
+void test_frexp_mant_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_manth(a);
+}
+
+// CHECK-LABEL: @test_frexp_exp_f16
+// CHECK: call i32 @llvm.amdgcn.frexp.exp.f16
+void test_frexp_exp_f16(global short* out, half a)
+{
+  *out = __builtin_amdgcn_frexp_exph(a);
+}
+
+// CHECK-LABEL: @test_fract_f16
+// CHECK: call half @llvm.amdgcn.fract.f16
+void test_fract_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_fracth(a);
+}
+
+// CHECK-LABEL: @test_class_f16
+// CHECK: call i1 @llvm.amdgcn.class.f16
+void test_class_f16(global half* out, half a, int b)
+{
+  *out = __builtin_amdgcn_classh(a, b);
+}
 
 // CHECK-LABEL: @test_s_memrealtime
 // CHECK: call i64 @llvm.amdgcn.s.memrealtime()

diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl
@@ -0,0 +1,18 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+void test_f16(global half *out, half a, half b, half c)
+{
+  *out = __builtin_amdgcn_div_fixuph(a, b, c); // expected-error {{'__builtin_amdgcn_div_fixuph' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_rcph(a); // expected-error {{'__builtin_amdgcn_rcph' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_rsqh(a); // expected-error {{'__builtin_amdgcn_rsqh' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_sinh(a); // expected-error {{'__builtin_amdgcn_sinh' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_cosh(a); // expected-error {{'__builtin_amdgcn_cosh' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_ldexph(a, b); // expected-error {{'__builtin_amdgcn_ldexph' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_frexp_manth(a); // expected-error {{'__builtin_amdgcn_frexp_manth' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_frexp_exph(a); // expected-error {{'__builtin_amdgcn_frexp_exph' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_fracth(a); // expected-error {{'__builtin_amdgcn_fracth' needs target feature 16-bit-insts}}
+  *out = __builtin_amdgcn_classh(a, b); // expected-error {{'__builtin_amdgcn_classh' needs target feature 16-bit-insts}}
+}
diff --git a/...st/CodeGenOpenCL/builtins-amdgcn-error.cl → .../test/SemaOpenCL/builtins-amdgcn-error.cl b/...st/CodeGenOpenCL/builtins-amdgcn-error.cl → .../test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -1,5 +1,5 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-amdhsa -target-cpu tahiti -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
 
 // FIXME: We only get one error if the functions are the other order in the
 // file.