From 78ede9f79b14db20f0d45fb394bbe3fb646bc327 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 1 Sep 2024 18:52:41 -0400 Subject: [PATCH] [Attributor] Add support for atomic operations in `AAAddressSpace` --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 9 +- .../Transforms/IPO/AttributorAttributes.cpp | 9 +- llvm/test/CodeGen/AMDGPU/aa-as-infer.ll | 157 ++++++++++++++++++ 3 files changed, 171 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 72049f0aa6b86..ffeec31bb930a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1084,10 +1084,15 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, if (auto *LI = dyn_cast(&I)) { A.getOrCreateAAFor( IRPosition::value(*LI->getPointerOperand())); - } - if (auto *SI = dyn_cast(&I)) { + } else if (auto *SI = dyn_cast(&I)) { A.getOrCreateAAFor( IRPosition::value(*SI->getPointerOperand())); + } else if (auto *RMW = dyn_cast(&I)) { + A.getOrCreateAAFor( + IRPosition::value(*RMW->getPointerOperand())); + } else if (auto *CmpX = dyn_cast(&I)) { + A.getOrCreateAAFor( + IRPosition::value(*CmpX->getPointerOperand())); } } } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 1258387b9185a..6fbb16e250f87 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12584,10 +12584,15 @@ struct AAAddressSpaceImpl : public AAAddressSpace { if (auto *LI = dyn_cast(Inst)) { Changed |= makeChange(A, LI, U, OriginalValue, NewPtrTy, UseOriginalValue); - } - if (auto *SI = dyn_cast(Inst)) { + } else if (auto *SI = dyn_cast(Inst)) { Changed |= makeChange(A, SI, U, OriginalValue, NewPtrTy, UseOriginalValue); + } else if (auto *RMW = dyn_cast(Inst)) { + Changed |= + makeChange(A, RMW, U, OriginalValue, NewPtrTy, UseOriginalValue); + } else if (auto *CmpX = dyn_cast(Inst)) { + Changed |= + makeChange(A, CmpX, U, OriginalValue, NewPtrTy, UseOriginalValue); } return true; }; diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll index 37689edc53e25..fdc5debb18915 100644 --- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll +++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll @@ -2,6 +2,7 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -S %s -o - | FileCheck %s @g1 = protected addrspace(1) externally_initialized global i32 0, align 4 +@g2 = protected addrspace(1) externally_initialized global i32 0, align 4 define internal void @volatile_load_store_as_0(ptr %p) { ; CHECK-LABEL: define internal void @volatile_load_store_as_0( @@ -86,3 +87,159 @@ define void @call_volatile_load_store_as_4(ptr addrspace(4) %p1, ptr addrspace(4 call void @volatile_load_store_as_1(ptr %p2.cast) ret void } + +define internal void @can_infer_cmpxchg(ptr %word) { +; CHECK-LABEL: define internal void @can_infer_cmpxchg( +; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr addrspace(1) [[TMP1]], i32 0, i32 4 monotonic monotonic, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr addrspace(1) [[TMP2]], i32 0, i32 5 acq_rel monotonic, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr addrspace(1) [[TMP3]], i32 0, i32 6 acquire monotonic, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr addrspace(1) [[TMP4]], i32 0, i32 7 release monotonic, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr addrspace(1) [[TMP5]], i32 0, i32 8 seq_cst monotonic, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr addrspace(1) [[TMP6]], i32 0, i32 9 seq_cst monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[WORD]], i32 0, i32 10 seq_cst monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[WORD]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4 +; CHECK-NEXT: ret void +; + %cmpxchg.0 = cmpxchg ptr %word, i32 0, i32 4 monotonic monotonic, align 4 + %cmpxchg.1 = cmpxchg ptr %word, i32 0, i32 5 acq_rel monotonic, align 4 + %cmpxchg.2 = cmpxchg ptr %word, i32 0, i32 6 acquire monotonic, align 4 + %cmpxchg.3 = cmpxchg ptr %word, i32 0, i32 7 release monotonic, align 4 + %cmpxchg.4 = cmpxchg ptr %word, i32 0, i32 8 seq_cst monotonic, align 4 + %cmpxchg.5 = cmpxchg weak ptr %word, i32 0, i32 9 seq_cst monotonic, align 4 + %cmpxchg.6 = cmpxchg volatile ptr %word, i32 0, i32 10 seq_cst monotonic, align 4 + %cmpxchg.7 = cmpxchg weak volatile ptr %word, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4 + ret void +} + +define internal void @can_not_infer_cmpxchg(ptr %word) { +; CHECK-LABEL: define internal void @can_not_infer_cmpxchg( +; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[WORD]], i32 0, i32 4 monotonic monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[WORD]], i32 0, i32 5 acq_rel monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[WORD]], i32 0, i32 6 acquire monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[WORD]], i32 0, i32 7 release monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[WORD]], i32 0, i32 8 seq_cst monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[WORD]], i32 0, i32 9 seq_cst monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[WORD]], i32 0, i32 10 seq_cst monotonic, align 4 +; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[WORD]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4 +; CHECK-NEXT: ret void +; + %cmpxchg.0 = cmpxchg ptr %word, i32 0, i32 4 monotonic monotonic, align 4 + %cmpxchg.1 = cmpxchg ptr %word, i32 0, i32 5 acq_rel monotonic, align 4 + %cmpxchg.2 = cmpxchg ptr %word, i32 0, i32 6 acquire monotonic, align 4 + %cmpxchg.3 = cmpxchg ptr %word, i32 0, i32 7 release monotonic, align 4 + %cmpxchg.4 = cmpxchg ptr %word, i32 0, i32 8 seq_cst monotonic, align 4 + %cmpxchg.5 = cmpxchg weak ptr %word, i32 0, i32 9 seq_cst monotonic, align 4 + %cmpxchg.6 = cmpxchg volatile ptr %word, i32 0, i32 10 seq_cst monotonic, align 4 + %cmpxchg.7 = cmpxchg weak volatile ptr %word, i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4 + ret void +} + +define internal void @can_infer_atomicrmw(ptr %word) { +; CHECK-LABEL: define internal void @can_infer_atomicrmw( +; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr addrspace(1) [[TMP1]], i32 12 monotonic, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr addrspace(1) [[TMP2]], i32 13 monotonic, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr addrspace(1) [[TMP3]], i32 14 monotonic, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr addrspace(1) [[TMP4]], i32 15 monotonic, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr addrspace(1) [[TMP5]], i32 16 monotonic, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr addrspace(1) [[TMP6]], i32 17 monotonic, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr addrspace(1) [[TMP7]], i32 18 monotonic, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr addrspace(1) [[TMP8]], i32 19 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[WORD]], i32 20 monotonic, align 4 +; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[WORD]] to ptr addrspace(1) +; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr addrspace(1) [[TMP10]], i32 21 syncscope("singlethread") monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[WORD]], i32 22 syncscope("singlethread") monotonic, align 4 +; CHECK-NEXT: ret void +; + %atomicrmw.xchg = atomicrmw xchg ptr %word, i32 12 monotonic, align 4 + %atomicrmw.add = atomicrmw add ptr %word, i32 13 monotonic, align 4 + %atomicrmw.sub = atomicrmw sub ptr %word, i32 14 monotonic, align 4 + %atomicrmw.and = atomicrmw and ptr %word, i32 15 monotonic, align 4 + %atomicrmw.nand = atomicrmw nand ptr %word, i32 16 monotonic, align 4 + %atomicrmw.or = atomicrmw or ptr %word, i32 17 monotonic, align 4 + %atomicrmw.xor = atomicrmw xor ptr %word, i32 18 monotonic, align 4 + %atomicrmw.max = atomicrmw max ptr %word, i32 19 monotonic, align 4 + %atomicrmw.min = atomicrmw volatile min ptr %word, i32 20 monotonic, align 4 + %atomicrmw.umax = atomicrmw umax ptr %word, i32 21 syncscope("singlethread") monotonic, align 4 + %atomicrmw.umin = atomicrmw volatile umin ptr %word, i32 22 syncscope("singlethread") monotonic, align 4 + ret void +} + +define internal void @can_not_infer_atomicrmw(ptr %word) { +; CHECK-LABEL: define internal void @can_not_infer_atomicrmw( +; CHECK-SAME: ptr [[WORD:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[WORD]], i32 12 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[WORD]], i32 13 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[WORD]], i32 14 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[WORD]], i32 15 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[WORD]], i32 16 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[WORD]], i32 17 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[WORD]], i32 18 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[WORD]], i32 19 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[WORD]], i32 20 monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[WORD]], i32 21 syncscope("singlethread") monotonic, align 4 +; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[WORD]], i32 22 syncscope("singlethread") monotonic, align 4 +; CHECK-NEXT: ret void +; + %atomicrmw.xchg = atomicrmw xchg ptr %word, i32 12 monotonic, align 4 + %atomicrmw.add = atomicrmw add ptr %word, i32 13 monotonic, align 4 + %atomicrmw.sub = atomicrmw sub ptr %word, i32 14 monotonic, align 4 + %atomicrmw.and = atomicrmw and ptr %word, i32 15 monotonic, align 4 + %atomicrmw.nand = atomicrmw nand ptr %word, i32 16 monotonic, align 4 + %atomicrmw.or = atomicrmw or ptr %word, i32 17 monotonic, align 4 + %atomicrmw.xor = atomicrmw xor ptr %word, i32 18 monotonic, align 4 + %atomicrmw.max = atomicrmw max ptr %word, i32 19 monotonic, align 4 + %atomicrmw.min = atomicrmw volatile min ptr %word, i32 20 monotonic, align 4 + %atomicrmw.umax = atomicrmw umax ptr %word, i32 21 syncscope("singlethread") monotonic, align 4 + %atomicrmw.umin = atomicrmw volatile umin ptr %word, i32 22 syncscope("singlethread") monotonic, align 4 + ret void +} + +define void @foo(ptr addrspace(3) %val) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ptr addrspace(3) [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[VAL_CAST:%.*]] = addrspacecast ptr addrspace(3) [[VAL]] to ptr +; CHECK-NEXT: call void @can_infer_cmpxchg(ptr addrspacecast (ptr addrspace(1) @g1 to ptr)) +; CHECK-NEXT: call void @can_infer_cmpxchg(ptr addrspacecast (ptr addrspace(1) @g2 to ptr)) +; CHECK-NEXT: call void @can_not_infer_cmpxchg(ptr addrspacecast (ptr addrspace(1) @g1 to ptr)) +; CHECK-NEXT: call void @can_not_infer_cmpxchg(ptr addrspacecast (ptr addrspace(1) @g2 to ptr)) +; CHECK-NEXT: call void @can_not_infer_cmpxchg(ptr [[VAL_CAST]]) +; CHECK-NEXT: call void @can_infer_atomicrmw(ptr addrspacecast (ptr addrspace(1) @g1 to ptr)) +; CHECK-NEXT: call void @can_infer_atomicrmw(ptr addrspacecast (ptr addrspace(1) @g2 to ptr)) +; CHECK-NEXT: call void @can_not_infer_atomicrmw(ptr addrspacecast (ptr addrspace(1) @g1 to ptr)) +; CHECK-NEXT: call void @can_not_infer_atomicrmw(ptr addrspacecast (ptr addrspace(1) @g2 to ptr)) +; CHECK-NEXT: call void @can_not_infer_atomicrmw(ptr [[VAL_CAST]]) +; CHECK-NEXT: ret void +; + %g1.cast = addrspacecast ptr addrspace(1) @g1 to ptr + %g2.cast = addrspacecast ptr addrspace(1) @g2 to ptr + %val.cast = addrspacecast ptr addrspace(3) %val to ptr + call void @can_infer_cmpxchg(ptr %g1.cast) + call void @can_infer_cmpxchg(ptr %g2.cast) + call void @can_not_infer_cmpxchg(ptr %g1.cast) + call void @can_not_infer_cmpxchg(ptr %g2.cast) + call void @can_not_infer_cmpxchg(ptr %val.cast) + call void @can_infer_atomicrmw(ptr %g1.cast) + call void @can_infer_atomicrmw(ptr %g2.cast) + call void @can_not_infer_atomicrmw(ptr %g1.cast) + call void @can_not_infer_atomicrmw(ptr %g2.cast) + call void @can_not_infer_atomicrmw(ptr %val.cast) + ret void +} +