diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 640a98c622f80..6e6ac735b1f37 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8474,6 +8474,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( CM.foldTailByMasking()); VPlanTransforms::createLoopRegions(*Plan); + VPlanTransforms::removeInvariantStoresOfReduction( + *Plan, Legal->getReductionVars(), *LVer); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -8546,8 +8548,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( ReversePostOrderTraversal> RPOT( HeaderVPBB); - auto *MiddleVPBB = Plan->getMiddleBlock(); - VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi(); // Mapping from VPValues in the initial plan to their widened VPValues. Needed // temporarily to update created block masks. DenseMap Old2New; @@ -8576,23 +8576,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( Instruction *Instr = cast(UnderlyingValue); Builder.setInsertPoint(SingleDef); - // The stores with invariant address inside the loop will be deleted, and - // in the exit block, a uniform store recipe will be created for the final - // invariant store of the reduction. - StoreInst *SI; - if ((SI = dyn_cast(Instr)) && - Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) { - // Only create recipe for the final invariant store of the reduction. - if (Legal->isInvariantStoreOfReduction(SI)) { - auto *Recipe = - new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */, - nullptr /*Mask*/, VPIRMetadata(*SI, LVer)); - Recipe->insertBefore(*MiddleVPBB, MBIP); - } - R.eraseFromParent(); - continue; - } - VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range); if (!Recipe) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index cef91c15dd873..849056659edb5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -634,6 +634,48 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan) { TopRegion->getEntryBasicBlock()->setName("vector.body"); } +void VPlanTransforms::removeInvariantStoresOfReduction( + VPlan &Plan, const MapVector &Rdxs, + LoopVersioning &LVer) { + auto *MiddleVPBB = Plan.getMiddleBlock(); + VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi(); + + auto IsInvariantStore = [Rdxs](StoreInst *SI) { + return any_of(Rdxs, [SI](const auto &RdxDesc) { + return RdxDesc.second.IntermediateStore == SI; + }); + }; + + auto IsInvariantAddr = [Rdxs](Value *V) { + return any_of(Rdxs, [V](const auto &RdxDesc) { + auto *SI = RdxDesc.second.IntermediateStore; + return SI && SI->getPointerOperand() == V; + }); + }; + + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { + auto *Def = cast(&R); + + // The stores with invariant address inside the loop will be deleted, and + // in the exit block, a uniform store recipe will be created for the final + // invariant store of the reduction. + StoreInst *SI = dyn_cast_if_present(Def->getUnderlyingValue()); + if (!SI || !IsInvariantAddr(SI->getPointerOperand())) + continue; + if (IsInvariantStore(SI)) { + // Only create recipe for the final invariant store of the reduction. + auto *Recipe = + new VPReplicateRecipe(SI, Def->operands(), /*IsSingleScalar=*/true, + /*Mask=*/nullptr, VPIRMetadata(*SI, &LVer)); + Recipe->insertBefore(*MiddleVPBB, MBIP); + } + Def->eraseFromParent(); + } + } +} + // Likelyhood of bypassing the vectorized loop due to a runtime check block, // including memory overlap checks block and wrapping/unit-stride checks block. static constexpr uint32_t CheckBypassWeights[] = {1, 127}; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 69452a7e37572..a567ae127cf63 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -121,6 +121,12 @@ struct VPlanTransforms { /// flat CFG into a hierarchical CFG. LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan); + /// Remove invariant stores of reductions, given \p Rdxs, in \p Plan. \p LVer + /// is used to create metadata from an existing store. + static void removeInvariantStoresOfReduction( + VPlan &Plan, const MapVector &Rdxs, + LoopVersioning &LVer); + /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a /// VPValue and connect the block to \p Plan, using the VPValue as branch /// condition.