diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 90cfd8cedd51b..be6df257f668b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3087,7 +3087,7 @@ class AMDGPULoadIntrinsic: Intrinsic< [llvm_any_ty], [ptr_ty], - [IntrReadMem, IntrWillReturn, IntrConvergent, NoCapture>, IntrNoCallback, IntrNoFree], + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, IntrConvergent, NoCapture>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand] >; diff --git a/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll new file mode 100644 index 0000000000000..46f53d8f82cfd --- /dev/null +++ b/llvm/test/Transforms/Inline/AMDGPU/load-intrinsics.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt -mtriple=amdgcn --passes=inline --enable-noalias-to-md-conversion -S %s | FileCheck --check-prefix=OPT %s + +; This test tests if the load intrinsic gets correct memory(argmem: read) attribute and +; the call instruction is assigned correct !alias.scope metadata post inlining + +define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) { +; OPT-LABEL: define void @caller( +; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]]) { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) +; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +; OPT-NEXT: [[GEP_I:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR_F]], i32 4 +; OPT-NEXT: [[VAL_I:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP_I]]), !alias.scope [[META0]], !noalias [[META3]] +; OPT-NEXT: store <2 x i32> [[VAL_I]], ptr addrspace(1) [[USE_F]], align 8, !alias.scope [[META3]], !noalias [[META0]] +; OPT-NEXT: ret void +; +entry: + call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) + ret void +} + +define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use) { +; OPT-LABEL: define void @callee( +; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]]) { +; OPT-NEXT: [[ENTRY:.*:]] +; OPT-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR]], i32 4 +; OPT-NEXT: [[VAL:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP]]) +; OPT-NEXT: store <2 x i32> [[VAL]], ptr addrspace(1) [[USE]], align 8 +; OPT-NEXT: ret void +; +entry: + %gep = getelementptr i64, ptr addrspace(3) %addr, i32 4 + %val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep) + store <2 x i32> %val, ptr addrspace(1) %use + ret void +} +;. +; Check Function Attribute on decl +; OPT: declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) captures(none)) #[[ATTR0:[0-9]+]] +declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3)) +; OPT: attributes #[[ATTR0]] = { convergent nocallback nofree nounwind willreturn memory(argmem: read) } +; OPT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;. +; OPT: [[META0]] = !{[[META1:![0-9]+]]} +; OPT: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee: %addr"} +; OPT: [[META2]] = distinct !{[[META2]], !"callee"} +; OPT: [[META3]] = !{[[META4:![0-9]+]]} +; OPT: [[META4]] = distinct !{[[META4]], [[META2]], !"callee: %use"} +;.