-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][AMDGPU] Add builtins for instrinsic llvm.amdgcn.raw.ptr.buffer.load
#99258
[Clang][AMDGPU] Add builtins for instrinsic llvm.amdgcn.raw.ptr.buffer.load
#99258
Conversation
|
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-clang-codegen Author: Shilei Tian (shiltian) ChangesFull diff: https://github.com/llvm/llvm-project/pull/99258.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 56bba448e12a4..401792fac6478 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -155,6 +155,12 @@ BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "viQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "cQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "sQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4iQbiiIi", "n")
//===----------------------------------------------------------------------===//
// Ballot builtins.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67027f8aa93f3..189a2cfd23aa5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19177,6 +19177,45 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
return emitBuiltinWithOneOverloadedType<5>(
*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
+ llvm::Type *RetTy = nullptr;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ RetTy = Int8Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ RetTy = Int16Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ RetTy = Int32Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/2, /*Scalable=*/false);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/3, /*Scalable=*/false);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/4, /*Scalable=*/false);
+ break;
+ }
+ SmallVector<Value *, 4> Args;
+ Args.push_back(EmitScalarExpr(E->getArg(0)));
+ Args.push_back(EmitScalarExpr(E->getArg(1)));
+ Args.push_back(EmitScalarExpr(E->getArg(2)));
+ Args.push_back(EmitScalarExpr(E->getArg(3)));
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
+ return Builder.CreateCall(F, Args);
+ }
default:
return nullptr;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
new file mode 100644
index 0000000000000..f95fa8e274bb7
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
@@ -0,0 +1,172 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef int i64 __attribute__((ext_vector_type(2)));
+typedef int i96 __attribute__((ext_vector_type(3)));
+typedef int i128 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
new file mode 100644
index 0000000000000..39b363b00fdad
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef int i64 __attribute__((ext_vector_type(2)));
+typedef int i96 __attribute__((ext_vector_type(3)));
+typedef int i128 __attribute__((ext_vector_type(4)));
+
+i8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b8' must be a constant integer}}
+}
+
+i16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b16' must be a constant integer}}
+}
+
+i32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b32' must be a constant integer}}
+}
+
+i64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b64' must be a constant integer}}
+}
+
+i96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b96' must be a constant integer}}
+}
+
+i128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b128' must be a constant integer}}
+}
|
|
@llvm/pr-subscribers-clang Author: Shilei Tian (shiltian) ChangesFull diff: https://github.com/llvm/llvm-project/pull/99258.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 56bba448e12a4..401792fac6478 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -155,6 +155,12 @@ BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "viQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "cQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "sQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4iQbiiIi", "n")
//===----------------------------------------------------------------------===//
// Ballot builtins.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67027f8aa93f3..189a2cfd23aa5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19177,6 +19177,45 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
return emitBuiltinWithOneOverloadedType<5>(
*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
+ llvm::Type *RetTy = nullptr;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ RetTy = Int8Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ RetTy = Int16Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ RetTy = Int32Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/2, /*Scalable=*/false);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/3, /*Scalable=*/false);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/4, /*Scalable=*/false);
+ break;
+ }
+ SmallVector<Value *, 4> Args;
+ Args.push_back(EmitScalarExpr(E->getArg(0)));
+ Args.push_back(EmitScalarExpr(E->getArg(1)));
+ Args.push_back(EmitScalarExpr(E->getArg(2)));
+ Args.push_back(EmitScalarExpr(E->getArg(3)));
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
+ return Builder.CreateCall(F, Args);
+ }
default:
return nullptr;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
new file mode 100644
index 0000000000000..f95fa8e274bb7
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
@@ -0,0 +1,172 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef int i64 __attribute__((ext_vector_type(2)));
+typedef int i96 __attribute__((ext_vector_type(3)));
+typedef int i128 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
new file mode 100644
index 0000000000000..39b363b00fdad
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef int i64 __attribute__((ext_vector_type(2)));
+typedef int i96 __attribute__((ext_vector_type(3)));
+typedef int i128 __attribute__((ext_vector_type(4)));
+
+i8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b8' must be a constant integer}}
+}
+
+i16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b16' must be a constant integer}}
+}
+
+i32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b32' must be a constant integer}}
+}
+
+i64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b64' must be a constant integer}}
+}
+
+i96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b96' must be a constant integer}}
+}
+
+i128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b128' must be a constant integer}}
+}
|
|
@llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesFull diff: https://github.com/llvm/llvm-project/pull/99258.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 56bba448e12a4..401792fac6478 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -155,6 +155,12 @@ BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "viQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "cQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "sQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4iQbiiIi", "n")
//===----------------------------------------------------------------------===//
// Ballot builtins.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67027f8aa93f3..189a2cfd23aa5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19177,6 +19177,45 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
return emitBuiltinWithOneOverloadedType<5>(
*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
+ llvm::Type *RetTy = nullptr;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ RetTy = Int8Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ RetTy = Int16Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ RetTy = Int32Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/2, /*Scalable=*/false);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/3, /*Scalable=*/false);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
+ RetTy =
+ llvm::VectorType::get(Int32Ty, /*NumElements=*/4, /*Scalable=*/false);
+ break;
+ }
+ SmallVector<Value *, 4> Args;
+ Args.push_back(EmitScalarExpr(E->getArg(0)));
+ Args.push_back(EmitScalarExpr(E->getArg(1)));
+ Args.push_back(EmitScalarExpr(E->getArg(2)));
+ Args.push_back(EmitScalarExpr(E->getArg(3)));
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
+ return Builder.CreateCall(F, Args);
+ }
default:
return nullptr;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
new file mode 100644
index 0000000000000..f95fa8e274bb7
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
@@ -0,0 +1,172 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef int i64 __attribute__((ext_vector_type(2)));
+typedef int i96 __attribute__((ext_vector_type(3)));
+typedef int i128 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+i8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+i16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+i32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+i64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+i96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+i128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
new file mode 100644
index 0000000000000..39b363b00fdad
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef int i64 __attribute__((ext_vector_type(2)));
+typedef int i96 __attribute__((ext_vector_type(3)));
+typedef int i128 __attribute__((ext_vector_type(4)));
+
+i8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b8' must be a constant integer}}
+}
+
+i16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b16' must be a constant integer}}
+}
+
+i32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b32' must be a constant integer}}
+}
+
+i64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b64' must be a constant integer}}
+}
+
+i96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b96' must be a constant integer}}
+}
+
+i128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b128' must be a constant integer}}
+}
|
4fc2c23 to
142a3a4
Compare
| BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "sQbiiIi", "n") | ||
| BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "iQbiiIi", "n") | ||
| BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2iQbiiIi", "n") | ||
| BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3iQbiiIi", "n") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically this one depends on a subtarget feature for 96 bit load/store but I don't see one that applies yet
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can do a separate patch to add the sub target feature to change both of them.
142a3a4 to
e029b94
Compare
e029b94 to
235b288
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably should change the stores to unsigned to match in a separate patch
Doing it right now. |
235b288 to
578d497
Compare
578d497 to
f60231b
Compare
…er.load` (#99258) Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250868

No description provided.