[NFC][AMDGPU] Use m_Intrinsic and m_AnyIntrinsic PatternMatch combinators across the backend#194337
[NFC][AMDGPU] Use m_Intrinsic and m_AnyIntrinsic PatternMatch combinators across the backend#194337
Conversation
…tors across the backend
|
@llvm/pr-subscribers-backend-amdgpu Author: Arseniy Obolenskiy (aobolensk) ChangesApply the m_Intrinsic / m_AnyIntrinsic combinators (introduced in #189554) to AMDGPU passes, replacing IntrinsicInst dyn_casts and intrinsic ID switches with a single match() call Full diff: https://github.com/llvm/llvm-project/pull/194337.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index e882e81a40287..d8c93d9315ca5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -16,6 +16,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/Attributor.h"
@@ -1571,6 +1572,7 @@ static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
bool DeleteFns, Module &M, AnalysisGetter &AG,
TargetMachine &TM, AMDGPUAttributorOptions Options,
ThinOrFullLTOPhase LTOPhase) {
+ using namespace PatternMatch;
CallGraphUpdater CGUpdater;
BumpPtrAllocator Allocator;
@@ -1641,10 +1643,8 @@ static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
if (Ptr) {
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
- if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
- A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
- }
+ if (match(Ptr, m_Intrinsic<Intrinsic::amdgcn_make_buffer_rsrc>()))
+ A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index b973e5da87b15..23b535706b6f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -788,7 +788,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
//
// llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and
// relaxed.
- if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
+ if (match(SrcCI,
+ m_AnyIntrinsic<Intrinsic::amdgcn_sqrt, Intrinsic::sqrt>())) {
const FPMathOperator *SqrtOp = cast<FPMathOperator>(SrcCI);
FastMathFlags InnerFMF = SqrtOp->getFastMathFlags();
if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 63e265612cbf7..ef31a165ae9c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
@@ -160,6 +161,7 @@ class LiveRegOptimizer {
}
bool isCoercionProfitable(Instruction *II) {
+ using namespace PatternMatch;
SmallPtrSet<Instruction *, 4> CVisited;
SmallVector<Instruction *, 4> UserList;
@@ -170,9 +172,8 @@ class LiveRegOptimizer {
UserList.push_back(UseInst);
auto IsLookThru = [](Instruction *II) {
- if (const auto *Intr = dyn_cast<IntrinsicInst>(II))
- return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
- return isa<PHINode, ShuffleVectorInst, InsertElementInst,
+ return match(II, m_Intrinsic<Intrinsic::amdgcn_perm>()) ||
+ isa<PHINode, ShuffleVectorInst, InsertElementInst,
ExtractElementInst, CastInst>(II);
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
index 9fbb19df1ba53..f89ff75f327c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ReplaceConstant.h"
#define DEBUG_TYPE "amdgpu-memory-utils"
@@ -354,33 +355,29 @@ void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
}
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
+ using namespace PatternMatch;
Instruction *DefInst = Def->getMemoryInst();
if (isa<FenceInst>(DefInst))
return false;
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_s_barrier:
- case Intrinsic::amdgcn_s_cluster_barrier:
- case Intrinsic::amdgcn_s_barrier_signal:
- case Intrinsic::amdgcn_s_barrier_signal_var:
- case Intrinsic::amdgcn_s_barrier_signal_isfirst:
- case Intrinsic::amdgcn_s_barrier_init:
- case Intrinsic::amdgcn_s_barrier_join:
- case Intrinsic::amdgcn_s_barrier_wait:
- case Intrinsic::amdgcn_s_barrier_leave:
- case Intrinsic::amdgcn_s_get_barrier_state:
- case Intrinsic::amdgcn_s_wakeup_barrier:
- case Intrinsic::amdgcn_wave_barrier:
- case Intrinsic::amdgcn_sched_barrier:
- case Intrinsic::amdgcn_sched_group_barrier:
- case Intrinsic::amdgcn_iglp_opt:
- return false;
- default:
- break;
- }
- }
+ if (match(
+ DefInst,
+ m_AnyIntrinsic<
+ Intrinsic::amdgcn_s_barrier, Intrinsic::amdgcn_s_cluster_barrier,
+ Intrinsic::amdgcn_s_barrier_signal,
+ Intrinsic::amdgcn_s_barrier_signal_var,
+ Intrinsic::amdgcn_s_barrier_signal_isfirst,
+ Intrinsic::amdgcn_s_barrier_init,
+ Intrinsic::amdgcn_s_barrier_join,
+ Intrinsic::amdgcn_s_barrier_wait,
+ Intrinsic::amdgcn_s_barrier_leave,
+ Intrinsic::amdgcn_s_get_barrier_state,
+ Intrinsic::amdgcn_s_wakeup_barrier,
+ Intrinsic::amdgcn_wave_barrier, Intrinsic::amdgcn_sched_barrier,
+ Intrinsic::amdgcn_sched_group_barrier,
+ Intrinsic::amdgcn_iglp_opt>()))
+ return false;
// Ignore atomics not aliasing with the original load, any atomic is a
// universal MemoryDef from MSSA's point of view too, just like a fence.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 0981f6becffc4..432c0bbfc07a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -636,6 +636,7 @@ static Value *promoteAllocaUserToVector(Instruction *Inst, const DataLayout &DL,
unsigned VecStoreSize,
unsigned ElementSize,
function_ref<Value *()> GetCurVal) {
+ using namespace PatternMatch;
// Note: we use InstSimplifyFolder because it can leverage the DataLayout
// to do more folding, especially in the case of vector splats.
IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
@@ -804,13 +805,12 @@ static Value *promoteAllocaUserToVector(Instruction *Inst, const DataLayout &DL,
return Builder.CreateVectorSplat(AA.Vector.Ty->getElementCount(), Elt);
}
- if (auto *Intr = dyn_cast<IntrinsicInst>(Inst)) {
- if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
- Intr->replaceAllUsesWith(
- Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
- DL.getTypeAllocSize(AA.Vector.Ty)));
- return nullptr;
- }
+ if (match(Inst, m_Intrinsic<Intrinsic::objectsize>())) {
+ auto *Intr = cast<IntrinsicInst>(Inst);
+ Intr->replaceAllUsesWith(
+ Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
+ DL.getTypeAllocSize(AA.Vector.Ty)));
+ return nullptr;
}
llvm_unreachable("Unsupported call when promoting alloca to vector");
@@ -964,6 +964,7 @@ AMDGPUPromoteAllocaImpl::getVectorTypeForAlloca(Type *AllocaTy) const {
}
void AMDGPUPromoteAllocaImpl::analyzePromoteToVector(AllocaAnalysis &AA) const {
+ using namespace PatternMatch;
if (AA.HaveSelectOrPHI) {
LLVM_DEBUG(dbgs() << " Cannot convert to vector due to select or phi\n");
return;
@@ -1078,11 +1079,9 @@ void AMDGPUPromoteAllocaImpl::analyzePromoteToVector(AllocaAnalysis &AA) const {
continue;
}
- if (auto *Intr = dyn_cast<IntrinsicInst>(Inst)) {
- if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
- AA.Vector.Worklist.push_back(Inst);
- continue;
- }
+ if (match(Inst, m_Intrinsic<Intrinsic::objectsize>())) {
+ AA.Vector.Worklist.push_back(Inst);
+ continue;
}
// Ignore assume-like intrinsics and comparisons used in assumes.
@@ -1316,25 +1315,15 @@ Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
}
static bool isCallPromotable(CallInst *CI) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (!II)
- return false;
-
- switch (II->getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group:
- case Intrinsic::objectsize:
- return true;
- default:
- return false;
- }
+ using namespace PatternMatch;
+ return match(
+ CI,
+ m_AnyIntrinsic<Intrinsic::memcpy, Intrinsic::memmove, Intrinsic::memset,
+ Intrinsic::lifetime_start, Intrinsic::lifetime_end,
+ Intrinsic::invariant_start, Intrinsic::invariant_end,
+ Intrinsic::launder_invariant_group,
+ Intrinsic::strip_invariant_group,
+ Intrinsic::objectsize>());
}
bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 030a9008c34dc..6cc378035b299 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1216,16 +1216,13 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
if (!CI)
return false;
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if:
- case Intrinsic::amdgcn_else: {
+ if (isa<IntrinsicInst>(CI)) {
+ if (match(CI,
+ m_AnyIntrinsic<Intrinsic::amdgcn_if, Intrinsic::amdgcn_else>())) {
ArrayRef<unsigned> Indices = ExtValue->getIndices();
return Indices.size() == 1 && Indices[0] == 1;
}
- }
+ return false;
}
// If we have inline asm returning mixed SGPR and VGPR results, we inferred
@@ -1796,14 +1793,9 @@ unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) const {
}
ValueUniformity GCNTTIImpl::getValueUniformity(const Value *V) const {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- switch (Intrinsic->getIntrinsicID()) {
- case Intrinsic::amdgcn_wave_shuffle:
- return ValueUniformity::Custom;
- default:
- break;
- }
- }
+ using namespace llvm::PatternMatch;
+ if (match(V, m_Intrinsic<Intrinsic::amdgcn_wave_shuffle>()))
+ return ValueUniformity::Custom;
if (isAlwaysUniform(V))
return ValueUniformity::AlwaysUniform;
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 54ec4a51a4ab3..57d75fc931b13 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -166,10 +167,10 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
}
bool SIAnnotateControlFlow::hasKill(const BasicBlock *BB) {
+ using namespace PatternMatch;
for (const Instruction &I : *BB) {
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->getIntrinsicID() == Intrinsic::amdgcn_kill)
- return true;
+ if (match(&I, m_Intrinsic<Intrinsic::amdgcn_kill>()))
+ return true;
}
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f08e12a7fbf31..87bae7e3f423b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/ModRef.h"
@@ -20281,30 +20282,16 @@ static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited,
return false;
bool Result = false;
for (const auto *U : V->users()) {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
- if (V == U->getOperand(1)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- Result = false;
- break;
- case Intrinsic::amdgcn_if_break:
- case Intrinsic::amdgcn_if:
- case Intrinsic::amdgcn_else:
- Result = true;
- break;
- }
- }
- if (V == U->getOperand(0)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- Result = false;
- break;
- case Intrinsic::amdgcn_end_cf:
- case Intrinsic::amdgcn_loop:
- Result = true;
- break;
- }
- }
+ if (isa<IntrinsicInst>(U)) {
+ if (V == U->getOperand(1))
+ Result = PatternMatch::match(
+ U, PatternMatch::m_AnyIntrinsic<Intrinsic::amdgcn_if_break,
+ Intrinsic::amdgcn_if,
+ Intrinsic::amdgcn_else>());
+ if (V == U->getOperand(0))
+ Result = PatternMatch::match(
+ U, PatternMatch::m_AnyIntrinsic<Intrinsic::amdgcn_end_cf,
+ Intrinsic::amdgcn_loop>());
} else {
Result = hasCFUser(U, Visited, WaveSize);
}
|
| } | ||
|
|
||
| void AMDGPUPromoteAllocaImpl::analyzePromoteToVector(AllocaAnalysis &AA) const { | ||
| using namespace PatternMatch; |
There was a problem hiding this comment.
Don't bother with using namespace if there is only one use, I don't think it helps readability. Same in this file and the others.
| A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr)); | ||
| A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr)); | ||
| if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) { | ||
| if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) |
There was a problem hiding this comment.
This kind of usage doesn't really fit the usual pattern pattern. This is something that would more naturally be a switch over intrinsic ID, it just happens this only covers 1 intrinsic now
| if (isa<FenceInst>(DefInst)) | ||
| return false; | ||
|
|
||
| if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { |
arsenm
left a comment
There was a problem hiding this comment.
I don't think PatternMatch should be used in most of these cases
I'm not sure I get the criteria on where it is appropriate to use. In AMDGPU backend it is used in variety of passes. As for the cases where it matches only one intrinsic, yes, it is understandable that dyn_cast is simpler to use there |
Apply the m_Intrinsic / m_AnyIntrinsic combinators (introduced in #189554) to AMDGPU passes, replacing IntrinsicInst dyn_casts and intrinsic ID switches with a single match() call