diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index ee44e9353d048e..d2b756e82964e6 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -937,6 +938,36 @@ void AtomicExpandImpl::expandPartwordAtomicRMW( AI->eraseFromParent(); } +/// Copy metadata that's safe to preserve when widening atomics. +static void copyMetadataForAtomic(Instruction &Dest, + const Instruction &Source) { + SmallVector, 8> MD; + Source.getAllMetadata(MD); + LLVMContext &Ctx = Dest.getContext(); + MDBuilder MDB(Ctx); + + for (auto [ID, N] : MD) { + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_access_group: + case LLVMContext::MD_mmra: + Dest.setMetadata(ID, N); + break; + default: + if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory")) + Dest.setMetadata(ID, N); + else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory")) + Dest.setMetadata(ID, N); + + break; + } + } +} + // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) { ReplacementIRBuilder Builder(AI, *DL); @@ -965,7 +996,8 @@ AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) { AtomicRMWInst *NewAI = Builder.CreateAtomicRMW( Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment, AI->getOrdering(), AI->getSyncScopeID()); - // TODO: Preserve metadata + + copyMetadataForAtomic(*NewAI, *AI); Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll index ce8524c70af60c..0acb8f8d0fcf60 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -176,9 +176,9 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4(ptr addrspace(1) %ptr, i1 ret i16 %res } -; Preserve unknown metadata -define i16 @test_atomicrmw_and_i16_global_agent_preserve_md(ptr addrspace(1) %ptr, i16 %value) { -; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_md( +; Drop unknown metadata and noundef +define i16 @test_atomicrmw_and_i16_global_agent_drop_md(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_drop_md( ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 @@ -198,9 +198,9 @@ define i16 @test_atomicrmw_and_i16_global_agent_preserve_md(ptr addrspace(1) %pt ret i16 %res } -; Preserve unknown metadata -define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_md(ptr addrspace(1) %ptr, i16 %value) { -; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_md( +; Drop unknown metadata +define i16 @test_atomicrmw_and_i16_global_agent_align4_drop_md(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_drop_md( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 ; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 @@ -211,6 +211,89 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_md(ptr addrspace ret i16 %res } +; Drop noundef, preserve mmra +define i16 @test_atomicrmw_and_i16_global_agent_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_mmra( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0:![0-9]+]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] +; + %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !mmra !1 + ret i16 %res +} + +; Drop noundef, preserve mmra +define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra( +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] +; + %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !mmra !1 + ret i16 %res +} + +define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope( +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !alias.scope [[META1:![0-9]+]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] +; + %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !alias.scope !2 + ret i16 %res +} + +define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias( +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noalias [[META1]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] +; + %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noalias !2 + ret i16 %res +} + +define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct( +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] +; + %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa.struct !5 + ret i16 %res +} + +define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(ptr addrspace(1) %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa( +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED]] +; + %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa !6 + ret i16 %res +} + define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory( ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) @@ -223,7 +306,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr add ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 ; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8:![0-9]+]] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] @@ -236,7 +319,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory( ; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 -; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; @@ -256,7 +339,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(p ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 ; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] @@ -269,7 +352,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_m ; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536 -; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; @@ -1180,6 +1263,15 @@ define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %pt } !0 = !{} +!1 = !{!"foo", !"bar"} +!2 = !{!3} +!3 = distinct !{!3, !4} +!4 = distinct !{!4} +!5 = !{i64 0, i64 4, !1, i64 8, i64 4} +!6 = !{!7, !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C/C++ TBAA"} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; BASE: {{.*}} ; GCN: {{.*}}