Skip to content

Conversation

artagnon
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 15, 2025

@llvm/pr-subscribers-vectorizers

Author: Ramkumar Ramachandra (artagnon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/158680.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2-19)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+42)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+6)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 640a98c622f80..6e6ac735b1f37 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8474,6 +8474,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
                                   CM.foldTailByMasking());
 
   VPlanTransforms::createLoopRegions(*Plan);
+  VPlanTransforms::removeInvariantStoresOfReduction(
+      *Plan, Legal->getReductionVars(), *LVer);
 
   // Don't use getDecisionAndClampRange here, because we don't know the UF
   // so this function is better to be conservative, rather than to split
@@ -8546,8 +8548,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
       HeaderVPBB);
 
-  auto *MiddleVPBB = Plan->getMiddleBlock();
-  VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
   // Mapping from VPValues in the initial plan to their widened VPValues. Needed
   // temporarily to update created block masks.
   DenseMap<VPValue *, VPValue *> Old2New;
@@ -8576,23 +8576,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
       Instruction *Instr = cast<Instruction>(UnderlyingValue);
       Builder.setInsertPoint(SingleDef);
 
-      // The stores with invariant address inside the loop will be deleted, and
-      // in the exit block, a uniform store recipe will be created for the final
-      // invariant store of the reduction.
-      StoreInst *SI;
-      if ((SI = dyn_cast<StoreInst>(Instr)) &&
-          Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
-        // Only create recipe for the final invariant store of the reduction.
-        if (Legal->isInvariantStoreOfReduction(SI)) {
-          auto *Recipe =
-              new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */,
-                                    nullptr /*Mask*/, VPIRMetadata(*SI, LVer));
-          Recipe->insertBefore(*MiddleVPBB, MBIP);
-        }
-        R.eraseFromParent();
-        continue;
-      }
-
       VPRecipeBase *Recipe =
           RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
       if (!Recipe)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index cef91c15dd873..849056659edb5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -634,6 +634,48 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
   TopRegion->getEntryBasicBlock()->setName("vector.body");
 }
 
+void VPlanTransforms::removeInvariantStoresOfReduction(
+    VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+    LoopVersioning &LVer) {
+  auto *MiddleVPBB = Plan.getMiddleBlock();
+  VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
+
+  auto IsInvariantStore = [Rdxs](StoreInst *SI) {
+    return any_of(Rdxs, [SI](const auto &RdxDesc) {
+      return RdxDesc.second.IntermediateStore == SI;
+    });
+  };
+
+  auto IsInvariantAddr = [Rdxs](Value *V) {
+    return any_of(Rdxs, [V](const auto &RdxDesc) {
+      auto *SI = RdxDesc.second.IntermediateStore;
+      return SI && SI->getPointerOperand() == V;
+    });
+  };
+
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+      auto *Def = cast<VPSingleDefRecipe>(&R);
+
+      // The stores with invariant address inside the loop will be deleted, and
+      // in the exit block, a uniform store recipe will be created for the final
+      // invariant store of the reduction.
+      StoreInst *SI = dyn_cast_if_present<StoreInst>(Def->getUnderlyingValue());
+      if (!SI || !IsInvariantAddr(SI->getPointerOperand()))
+        continue;
+      if (IsInvariantStore(SI)) {
+        // Only create recipe for the final invariant store of the reduction.
+        auto *Recipe =
+            new VPReplicateRecipe(SI, Def->operands(), /*IsSingleScalar=*/true,
+                                  /*Mask=*/nullptr, VPIRMetadata(*SI, &LVer));
+        Recipe->insertBefore(*MiddleVPBB, MBIP);
+      }
+      Def->eraseFromParent();
+    }
+  }
+}
+
 // Likelyhood of bypassing the vectorized loop due to a runtime check block,
 // including memory overlap checks block and wrapping/unit-stride checks block.
 static constexpr uint32_t CheckBypassWeights[] = {1, 127};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 69452a7e37572..a567ae127cf63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -121,6 +121,12 @@ struct VPlanTransforms {
   /// flat CFG into a hierarchical CFG.
   LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
 
+  /// Remove invariant stores of reductions, given \p Rdxs, in \p Plan. \p LVer
+  /// is used to create metadata from an existing store.
+  static void removeInvariantStoresOfReduction(
+      VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+      LoopVersioning &LVer);
+
   /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
   /// VPValue and connect the block to \p Plan, using the VPValue as branch
   /// condition.

@llvmbot
Copy link
Member

llvmbot commented Sep 15, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/158680.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2-19)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+42)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+6)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 640a98c622f80..6e6ac735b1f37 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8474,6 +8474,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
                                   CM.foldTailByMasking());
 
   VPlanTransforms::createLoopRegions(*Plan);
+  VPlanTransforms::removeInvariantStoresOfReduction(
+      *Plan, Legal->getReductionVars(), *LVer);
 
   // Don't use getDecisionAndClampRange here, because we don't know the UF
   // so this function is better to be conservative, rather than to split
@@ -8546,8 +8548,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
       HeaderVPBB);
 
-  auto *MiddleVPBB = Plan->getMiddleBlock();
-  VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
   // Mapping from VPValues in the initial plan to their widened VPValues. Needed
   // temporarily to update created block masks.
   DenseMap<VPValue *, VPValue *> Old2New;
@@ -8576,23 +8576,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
       Instruction *Instr = cast<Instruction>(UnderlyingValue);
       Builder.setInsertPoint(SingleDef);
 
-      // The stores with invariant address inside the loop will be deleted, and
-      // in the exit block, a uniform store recipe will be created for the final
-      // invariant store of the reduction.
-      StoreInst *SI;
-      if ((SI = dyn_cast<StoreInst>(Instr)) &&
-          Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
-        // Only create recipe for the final invariant store of the reduction.
-        if (Legal->isInvariantStoreOfReduction(SI)) {
-          auto *Recipe =
-              new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */,
-                                    nullptr /*Mask*/, VPIRMetadata(*SI, LVer));
-          Recipe->insertBefore(*MiddleVPBB, MBIP);
-        }
-        R.eraseFromParent();
-        continue;
-      }
-
       VPRecipeBase *Recipe =
           RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
       if (!Recipe)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index cef91c15dd873..849056659edb5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -634,6 +634,48 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) {
   TopRegion->getEntryBasicBlock()->setName("vector.body");
 }
 
+void VPlanTransforms::removeInvariantStoresOfReduction(
+    VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+    LoopVersioning &LVer) {
+  auto *MiddleVPBB = Plan.getMiddleBlock();
+  VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
+
+  auto IsInvariantStore = [Rdxs](StoreInst *SI) {
+    return any_of(Rdxs, [SI](const auto &RdxDesc) {
+      return RdxDesc.second.IntermediateStore == SI;
+    });
+  };
+
+  auto IsInvariantAddr = [Rdxs](Value *V) {
+    return any_of(Rdxs, [V](const auto &RdxDesc) {
+      auto *SI = RdxDesc.second.IntermediateStore;
+      return SI && SI->getPointerOperand() == V;
+    });
+  };
+
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+      auto *Def = cast<VPSingleDefRecipe>(&R);
+
+      // The stores with invariant address inside the loop will be deleted, and
+      // in the exit block, a uniform store recipe will be created for the final
+      // invariant store of the reduction.
+      StoreInst *SI = dyn_cast_if_present<StoreInst>(Def->getUnderlyingValue());
+      if (!SI || !IsInvariantAddr(SI->getPointerOperand()))
+        continue;
+      if (IsInvariantStore(SI)) {
+        // Only create recipe for the final invariant store of the reduction.
+        auto *Recipe =
+            new VPReplicateRecipe(SI, Def->operands(), /*IsSingleScalar=*/true,
+                                  /*Mask=*/nullptr, VPIRMetadata(*SI, &LVer));
+        Recipe->insertBefore(*MiddleVPBB, MBIP);
+      }
+      Def->eraseFromParent();
+    }
+  }
+}
+
 // Likelyhood of bypassing the vectorized loop due to a runtime check block,
 // including memory overlap checks block and wrapping/unit-stride checks block.
 static constexpr uint32_t CheckBypassWeights[] = {1, 127};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 69452a7e37572..a567ae127cf63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -121,6 +121,12 @@ struct VPlanTransforms {
   /// flat CFG into a hierarchical CFG.
   LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
 
+  /// Remove invariant stores of reductions, given \p Rdxs, in \p Plan. \p LVer
+  /// is used to create metadata from an existing store.
+  static void removeInvariantStoresOfReduction(
+      VPlan &Plan, const MapVector<PHINode *, RecurrenceDescriptor> &Rdxs,
+      LoopVersioning &LVer);
+
   /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
   /// VPValue and connect the block to \p Plan, using the VPValue as branch
   /// condition.

@artagnon artagnon requested a review from lukel97 September 22, 2025 09:23
@artagnon
Copy link
Contributor Author

Gentle ping.

TopRegion->getEntryBasicBlock()->setName("vector.body");
}

void VPlanTransforms::removeInvariantStoresOfReduction(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if it is worth moving this out, as long as it is based on LLVM IR references

@artagnon artagnon closed this Sep 22, 2025
@artagnon artagnon deleted the vplan-remove-inv-stores branch September 22, 2025 09:30
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants