-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[SLP][NFC]Simplify analysis of the scalars, NFC. #170382
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP][NFC]Simplify analysis of the scalars, NFC. #170382
Conversation
Created using spr 1.3.7
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesJust an attempt to simplify some checks, remove extra calls and reorder Full diff: https://github.com/llvm/llvm-project/pull/170382.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0eb8ad8d3c93d..2883427f0fff9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11315,44 +11315,90 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
VL, *this, TryCopyableElementsVectorization,
/*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
+ bool AreScatterAllGEPSameBlock = false;
+ if (!S) {
+ SmallVector<unsigned> SortedIndices;
+ BasicBlock *BB = nullptr;
+ bool IsScatterVectorizeUserTE =
+ UserTreeIdx.UserTE &&
+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
+ AreScatterAllGEPSameBlock =
+ (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
+ VL.size() > 2 &&
+ all_of(VL,
+ [&BB](Value *V) {
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I)
+ return doesNotNeedToBeScheduled(V);
+ if (!BB)
+ BB = I->getParent();
+ return BB == I->getParent() && I->getNumOperands() == 2;
+ }) &&
+ BB &&
+ sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL,
+ *SE, SortedIndices));
+ if (!AreScatterAllGEPSameBlock) {
+ LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
+ "C,S,B,O, small shuffle. \n";
+ dbgs() << "[";
+ interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
+ dbgs() << "]\n");
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
+ /*TryToFindDuplicates=*/true,
+ /*TrySplitVectorize=*/true);
+ }
+ // Reset S to make it GetElementPtr kind of node.
+ const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
+ assert(It != VL.end() && "Expected at least one GEP.");
+ S = getSameOpcode(*It, *TLI);
+ }
+
+ // Don't handle vectors.
+ if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
+ // Do not try to pack to avoid extra instructions here.
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
+ /*TryToFindDuplicates=*/false);
+ }
+
+ // Check that all of the users of the scalars that we want to vectorize are
+ // schedulable.
+ BasicBlock *BB = S.getMainOp()->getParent();
+
+ if (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
+ !DT->isReachableFromEntry(BB)) {
+ // Don't go into unreachable blocks. They may contain instructions with
+ // dependency cycles which confuse the final scheduling.
+ // Do not vectorize EH and non-returning blocks, not profitable in most
+ // cases.
+ LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
+ }
+
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
// place to insert a shuffle if we need to, so just avoid that issue.
- if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
+ if (isa<CatchSwitchInst>(BB->getTerminator())) {
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
// Do not try to pack to avoid extra instructions here.
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
/*TryToFindDuplicates=*/false);
}
- // Check if this is a duplicate of another entry.
- if (S) {
- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
- for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
- if (E->isSame(VL)) {
- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
- << ".\n");
- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
- }
- SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
- if (all_of(VL, [&](Value *V) {
- return isa<PoisonValue>(V) || Values.contains(V) ||
- (S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
- LI->getLoopFor(S.getMainOp()->getParent()) &&
- isVectorized(V));
- })) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
- }
- }
+ // Don't handle scalable vectors
+ if (S.getOpcode() == Instruction::ExtractElement &&
+ isa<ScalableVectorType>(
+ cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
}
// Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
// a load), in which case peek through to include it in the tree, without
// ballooning over-budget.
if (Depth >= RecursionMaxDepth &&
- !(S && !S.isAltShuffle() && VL.size() >= 4 &&
- (match(S.getMainOp(), m_Load(m_Value())) ||
+ (S.isAltShuffle() || VL.size() < 4 ||
+ !(match(S.getMainOp(), m_Load(m_Value())) ||
all_of(VL, [&S](const Value *I) {
return match(I,
m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
@@ -11362,20 +11408,24 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
}
- // Don't handle scalable vectors
- if (S && S.getOpcode() == Instruction::ExtractElement &&
- isa<ScalableVectorType>(
- cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
- }
-
- // Don't handle vectors.
- if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- // Do not try to pack to avoid extra instructions here.
- return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
- /*TryToFindDuplicates=*/false);
+ // Check if this is a duplicate of another entry.
+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
+ for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
+ if (E->isSame(VL)) {
+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
+ << ".\n");
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
+ }
+ SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
+ if (all_of(VL, [&](Value *V) {
+ return isa<PoisonValue>(V) || Values.contains(V) ||
+ (S.getOpcode() == Instruction::PHI && isa<PHINode>(V) &&
+ LI->getLoopFor(S.getMainOp()->getParent()) &&
+ isVectorized(V));
+ })) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
+ }
}
// If all of the operands are identical or constant we have a simple solution.
@@ -11434,44 +11484,13 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
}
return true;
};
- SmallVector<unsigned> SortedIndices;
- BasicBlock *BB = nullptr;
- bool IsScatterVectorizeUserTE =
- UserTreeIdx.UserTE &&
- UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
- bool AreAllSameBlock = S.valid();
- bool AreScatterAllGEPSameBlock =
- (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
- VL.size() > 2 &&
- all_of(VL,
- [&BB](Value *V) {
- auto *I = dyn_cast<GetElementPtrInst>(V);
- if (!I)
- return doesNotNeedToBeScheduled(V);
- if (!BB)
- BB = I->getParent();
- return BB == I->getParent() && I->getNumOperands() == 2;
- }) &&
- BB &&
- sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
- SortedIndices));
+ bool AreAllSameBlock = !AreScatterAllGEPSameBlock;
bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock;
- if (!AreAllSameInsts || (!S && allConstant(VL)) || isSplat(VL) ||
- (S &&
- isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
+ if (!AreAllSameInsts || isSplat(VL) ||
+ (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
S.getMainOp()) &&
!all_of(VL, isVectorLikeInstWithConstOps)) ||
NotProfitableForVectorization(VL)) {
- if (!S) {
- LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
- "C,S,B,O, small shuffle. \n";
- dbgs() << "[";
- interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
- dbgs() << "]\n");
- return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
- /*TryToFindDuplicates=*/true,
- /*TrySplitVectorize=*/true);
- }
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n";
dbgs() << "[";
interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
@@ -11480,7 +11499,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
}
// Don't vectorize ephemeral values.
- if (S && !EphValues.empty()) {
+ if (!EphValues.empty()) {
for (Value *V : VL) {
if (EphValues.count(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -11498,7 +11517,7 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
// Check that none of the instructions in the bundle are already in the tree
// and the node may be not profitable for the vectorization as the small
// alternate node.
- if (S && S.isAltShuffle()) {
+ if (S.isAltShuffle()) {
auto GetNumVectorizedExtracted = [&]() {
APInt Extracted = APInt::getZero(VL.size());
APInt Vectorized = APInt::getAllOnes(VL.size());
@@ -11550,33 +11569,6 @@ BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
}
}
- // Special processing for sorted pointers for ScatterVectorize node with
- // constant indeces only.
- if (!AreAllSameBlock && AreScatterAllGEPSameBlock) {
- assert(VL.front()->getType()->isPointerTy() &&
- count_if(VL, IsaPred<GetElementPtrInst>) >= 2 &&
- "Expected pointers only.");
- // Reset S to make it GetElementPtr kind of node.
- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
- assert(It != VL.end() && "Expected at least one GEP.");
- S = getSameOpcode(*It, *TLI);
- }
-
- // Check that all of the users of the scalars that we want to vectorize are
- // schedulable.
- Instruction *VL0 = S.getMainOp();
- BB = VL0->getParent();
-
- if (S &&
- (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) ||
- !DT->isReachableFromEntry(BB))) {
- // Don't go into unreachable blocks. They may contain instructions with
- // dependency cycles which confuse the final scheduling.
- // Do not vectorize EH and non-returning blocks, not profitable in most
- // cases.
- LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
- return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
- }
return ScalarsVectorizationLegality(S, /*IsLegal=*/true);
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull request overview
This PR refactors the getScalarsVectorizationLegality function in the SLP vectorizer to simplify the analysis flow and improve performance. The changes reorder various validation checks and eliminate redundant condition evaluations.
- Moves early-exit checks (unreachable blocks, EH pads, vector types, scalable vectors) to occur before more expensive analyses
- Eliminates redundant null checks on the
SInstructionsState variable throughout the function - Hoists the scatter-vectorize GEP analysis to execute earlier in the function flow
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| BB && | ||
| sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE, | ||
| SortedIndices)); | ||
| bool AreAllSameBlock = !AreScatterAllGEPSameBlock; |
Copilot
AI
Dec 2, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable AreAllSameBlock is now unconditionally set to !AreScatterAllGEPSameBlock, which differs from the original logic where it was set to S.valid(). This appears to be a logic change rather than just a refactoring. If this is intentional, the variable name may be misleading since it no longer represents whether all values are in the same block, but rather whether scatter GEP analysis was not performed.
| bool AreAllSameBlock = !AreScatterAllGEPSameBlock; | |
| bool AreAllSameBlock = S.valid(); |
| if (!AreAllSameInsts || isSplat(VL) || | ||
| (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>( | ||
| S.getMainOp()) && | ||
| !all_of(VL, isVectorLikeInstWithConstOps)) || | ||
| NotProfitableForVectorization(VL)) { |
Copilot
AI
Dec 2, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The removal of the allConstant(VL) check and the !S guard before the isa<...> check could lead to a null pointer dereference if S.getMainOp() is null when S is invalid. The original code only performed this check when S was valid.
Created using spr 1.3.7
Just an attempt to simplify some checks, remove extra calls and reorder checks to make code simpler and faster Reviewers: RKSimon, hiraditya Reviewed By: hiraditya Pull Request: llvm/llvm-project#170382
Just an attempt to simplify some checks, remove extra calls and reorder checks to make code simpler and faster Reviewers: RKSimon, hiraditya Reviewed By: hiraditya Pull Request: llvm#170382
Just an attempt to simplify some checks, remove extra calls and reorder
checks to make code simpler and faster