-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Introduce removeInvariantStoresOfReduction (NFCI) #158680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesFull diff: https://github.com/llvm/llvm-project/pull/158680.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 640a98c622f80..6e6ac735b1f37 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8474,6 +8474,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
CM.foldTailByMasking());
VPlanTransforms::createLoopRegions(*Plan);
+ VPlanTransforms::removeInvariantStoresOfReduction(
+ *Plan, Legal->getReductionVars(), *LVer);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
@@ -8546,8 +8548,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
- auto *MiddleVPBB = Plan->getMiddleBlock();
- VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;
@@ -8576,23 +8576,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
Instruction *Instr = cast<Instruction>(UnderlyingValue);
Builder.setInsertPoint(SingleDef);
- // The stores with invariant address inside the loop will be deleted, and
- // in the exit block, a uniform store recipe will be created for the final
- // invariant store of the reduction.
- StoreInst *SI;
- if ((SI = dyn_cast<StoreInst>(Instr)) &&
- Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
- // Only create recipe for the final invariant store of the reduction.
- if (Legal->isInvariantStoreOfReduction(SI)) {
- auto *Recipe =
- new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */,
- nullptr /*Mask*/, VPIRMetadata(*SI, LVer));
- Recipe->insertBefore(*MiddleVPBB, MBIP);
- }
- R.eraseFromParent();
- continue;
- }
-
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
if (!Recipe)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index cef91c15dd873..849056659edb5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -634,6 +634,48 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
TopRegion->getEntryBasicBlock()->setName("vector.body");
}
+void VPlanTransforms::removeInvariantStoresOfReduction(
+ VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+ LoopVersioning &LVer) {
+ auto *MiddleVPBB = Plan.getMiddleBlock();
+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
+
+ auto IsInvariantStore = [Rdxs](StoreInst *SI) {
+ return any_of(Rdxs, [SI](const auto &RdxDesc) {
+ return RdxDesc.second.IntermediateStore == SI;
+ });
+ };
+
+ auto IsInvariantAddr = [Rdxs](Value *V) {
+ return any_of(Rdxs, [V](const auto &RdxDesc) {
+ auto *SI = RdxDesc.second.IntermediateStore;
+ return SI && SI->getPointerOperand() == V;
+ });
+ };
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *Def = cast<VPSingleDefRecipe>(&R);
+
+ // The stores with invariant address inside the loop will be deleted, and
+ // in the exit block, a uniform store recipe will be created for the final
+ // invariant store of the reduction.
+ StoreInst *SI = dyn_cast_if_present<StoreInst>(Def->getUnderlyingValue());
+ if (!SI || !IsInvariantAddr(SI->getPointerOperand()))
+ continue;
+ if (IsInvariantStore(SI)) {
+ // Only create recipe for the final invariant store of the reduction.
+ auto *Recipe =
+ new VPReplicateRecipe(SI, Def->operands(), /*IsSingleScalar=*/true,
+ /*Mask=*/nullptr, VPIRMetadata(*SI, &LVer));
+ Recipe->insertBefore(*MiddleVPBB, MBIP);
+ }
+ Def->eraseFromParent();
+ }
+ }
+}
+
// Likelyhood of bypassing the vectorized loop due to a runtime check block,
// including memory overlap checks block and wrapping/unit-stride checks block.
static constexpr uint32_t CheckBypassWeights[] = {1, 127};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 69452a7e37572..a567ae127cf63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -121,6 +121,12 @@ struct VPlanTransforms {
/// flat CFG into a hierarchical CFG.
LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
+ /// Remove invariant stores of reductions, given \p Rdxs, in \p Plan. \p LVer
+ /// is used to create metadata from an existing store.
+ static void removeInvariantStoresOfReduction(
+ VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+ LoopVersioning &LVer);
+
/// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
/// VPValue and connect the block to \p Plan, using the VPValue as branch
/// condition.
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesFull diff: https://github.com/llvm/llvm-project/pull/158680.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 640a98c622f80..6e6ac735b1f37 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8474,6 +8474,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
CM.foldTailByMasking());
VPlanTransforms::createLoopRegions(*Plan);
+ VPlanTransforms::removeInvariantStoresOfReduction(
+ *Plan, Legal->getReductionVars(), *LVer);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
@@ -8546,8 +8548,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
- auto *MiddleVPBB = Plan->getMiddleBlock();
- VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;
@@ -8576,23 +8576,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
Instruction *Instr = cast<Instruction>(UnderlyingValue);
Builder.setInsertPoint(SingleDef);
- // The stores with invariant address inside the loop will be deleted, and
- // in the exit block, a uniform store recipe will be created for the final
- // invariant store of the reduction.
- StoreInst *SI;
- if ((SI = dyn_cast<StoreInst>(Instr)) &&
- Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
- // Only create recipe for the final invariant store of the reduction.
- if (Legal->isInvariantStoreOfReduction(SI)) {
- auto *Recipe =
- new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */,
- nullptr /*Mask*/, VPIRMetadata(*SI, LVer));
- Recipe->insertBefore(*MiddleVPBB, MBIP);
- }
- R.eraseFromParent();
- continue;
- }
-
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
if (!Recipe)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index cef91c15dd873..849056659edb5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -634,6 +634,48 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
TopRegion->getEntryBasicBlock()->setName("vector.body");
}
+void VPlanTransforms::removeInvariantStoresOfReduction(
+ VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+ LoopVersioning &LVer) {
+ auto *MiddleVPBB = Plan.getMiddleBlock();
+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
+
+ auto IsInvariantStore = [Rdxs](StoreInst *SI) {
+ return any_of(Rdxs, [SI](const auto &RdxDesc) {
+ return RdxDesc.second.IntermediateStore == SI;
+ });
+ };
+
+ auto IsInvariantAddr = [Rdxs](Value *V) {
+ return any_of(Rdxs, [V](const auto &RdxDesc) {
+ auto *SI = RdxDesc.second.IntermediateStore;
+ return SI && SI->getPointerOperand() == V;
+ });
+ };
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *Def = cast<VPSingleDefRecipe>(&R);
+
+ // The stores with invariant address inside the loop will be deleted, and
+ // in the exit block, a uniform store recipe will be created for the final
+ // invariant store of the reduction.
+ StoreInst *SI = dyn_cast_if_present<StoreInst>(Def->getUnderlyingValue());
+ if (!SI || !IsInvariantAddr(SI->getPointerOperand()))
+ continue;
+ if (IsInvariantStore(SI)) {
+ // Only create recipe for the final invariant store of the reduction.
+ auto *Recipe =
+ new VPReplicateRecipe(SI, Def->operands(), /*IsSingleScalar=*/true,
+ /*Mask=*/nullptr, VPIRMetadata(*SI, &LVer));
+ Recipe->insertBefore(*MiddleVPBB, MBIP);
+ }
+ Def->eraseFromParent();
+ }
+ }
+}
+
// Likelyhood of bypassing the vectorized loop due to a runtime check block,
// including memory overlap checks block and wrapping/unit-stride checks block.
static constexpr uint32_t CheckBypassWeights[] = {1, 127};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 69452a7e37572..a567ae127cf63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -121,6 +121,12 @@ struct VPlanTransforms {
/// flat CFG into a hierarchical CFG.
LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
+ /// Remove invariant stores of reductions, given \p Rdxs, in \p Plan. \p LVer
+ /// is used to create metadata from an existing store.
+ static void removeInvariantStoresOfReduction(
+ VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+ LoopVersioning &LVer);
+
/// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
/// VPValue and connect the block to \p Plan, using the VPValue as branch
/// condition.
|
Gentle ping. |
TopRegion->getEntryBasicBlock()->setName("vector.body"); | ||
} | ||
|
||
void VPlanTransforms::removeInvariantStoresOfReduction( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure if it is worth moving this out, as long as it is based on LLVM IR references
No description provided.