From eca14a810e592be7b6e8d95a77263f5131b338f2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 16 Mar 2023 17:06:44 +0000 Subject: [PATCH] [VPlan] Consolidate replicate region optimizations (NFC). As suggested in D143865, consolidate replicate region creation and optimization in a single helper that's exposed and used by LV. --- .../Transforms/Vectorize/LoopVectorize.cpp | 11 +-- .../Transforms/Vectorize/VPlanTransforms.cpp | 76 +++++++++++-------- .../Transforms/Vectorize/VPlanTransforms.h | 14 ++-- 3 files changed, 51 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 86a190fd2bf53..18576cf3c707e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9068,16 +9068,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); VPlanTransforms::removeDeadRecipes(*Plan); - // Convert masked VPReplicateRecipes to if-then region blocks. - VPlanTransforms::addReplicateRegions(*Plan, RecipeBuilder); - - bool ShouldSimplify = true; - while (ShouldSimplify) { - ShouldSimplify = VPlanTransforms::sinkScalarOperands(*Plan); - ShouldSimplify |= - VPlanTransforms::mergeReplicateRegionsIntoSuccessors(*Plan); - ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(*Plan); - } + VPlanTransforms::createAndOptimizeReplicateRegions(*Plan, RecipeBuilder); VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan); VPlanTransforms::mergeBlocksIntoPredecessors(*Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 118124cac60d6..e642d264b1c75 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -106,7 +106,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes( } } -bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { +static bool sinkScalarOperands(VPlan &Plan) { auto Iter = vp_depth_first_deep(Plan.getEntry()); bool Changed = false; // First, collect the operands of all recipes in replicate blocks as seeds for @@ -223,7 +223,10 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { return nullptr; } -bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { +// Merge replicate regions in their successor region, if a replicate region +// is connected to a successor replicate region with the same predicate by a +// single, empty VPBasicBlock. +static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { SetVector DeletedRegions; // Collect replicate regions followed by an empty block, followed by another @@ -311,6 +314,46 @@ bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { return !DeletedRegions.empty(); } +static void addReplicateRegions(VPlan &Plan, VPRecipeBuilder &Builder) { + SmallVector WorkList; + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + for (VPRecipeBase &R : *VPBB) + if (auto *RepR = dyn_cast(&R)) { + if (RepR->isPredicated()) + WorkList.push_back(RepR); + } + } + + unsigned BBNum = 0; + for (VPReplicateRecipe *RepR : WorkList) { + VPBasicBlock *CurrentBlock = RepR->getParent(); + VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); + + BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); + SplitBlock->setName( + OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); + // Record predicated instructions for above packing optimizations. + VPBlockBase *Region = Builder.createReplicateRegion(RepR, Plan); + Region->setParent(CurrentBlock->getParent()); + VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); + VPBlockUtils::connectBlocks(CurrentBlock, Region); + VPBlockUtils::connectBlocks(Region, SplitBlock); + } +} + +void VPlanTransforms::createAndOptimizeReplicateRegions( + VPlan &Plan, VPRecipeBuilder &Builder) { + // Convert masked VPReplicateRecipes to if-then region blocks. + addReplicateRegions(Plan, Builder); + + bool ShouldSimplify = true; + while (ShouldSimplify) { + ShouldSimplify = sinkScalarOperands(Plan); + ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan); + ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan); + } +} bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { SmallVector WorkList; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( @@ -668,32 +711,3 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, RecurSplice->setOperand(0, FOR); } } - -void VPlanTransforms::addReplicateRegions(VPlan &Plan, - VPRecipeBuilder &Builder) { - SmallVector WorkList; - for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( - vp_depth_first_deep(Plan.getEntry()))) { - for (VPRecipeBase &R : *VPBB) - if (auto *RepR = dyn_cast(&R)) { - if (RepR->isPredicated()) - WorkList.push_back(RepR); - } - } - - unsigned BBNum = 0; - for (VPReplicateRecipe *RepR : WorkList) { - VPBasicBlock *CurrentBlock = RepR->getParent(); - VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); - - BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); - SplitBlock->setName( - OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); - // Record predicated instructions for above packing optimizations. - VPBlockBase *Region = Builder.createReplicateRegion(RepR, Plan); - Region->setParent(CurrentBlock->getParent()); - VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); - VPBlockUtils::connectBlocks(CurrentBlock, Region); - VPBlockUtils::connectBlocks(Region, SplitBlock); - } -} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 26fe9f44dd8c2..4c4a11fa72506 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -39,15 +39,11 @@ struct VPlanTransforms { ScalarEvolution &SE, const TargetLibraryInfo &TLI); /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then - /// region block and remove the mask operand. - static void addReplicateRegions(VPlan &Plan, VPRecipeBuilder &Builder); - - static bool sinkScalarOperands(VPlan &Plan); - - /// Merge replicate regions in their successor region, if a replicate region - /// is connected to a successor replicate region with the same predicate by a - /// single, empty VPBasicBlock. - static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan); + /// region block and remove the mask operand. Optimize the created regions by + /// iteratively sinking scalar operands into the region, followed by merging + /// regions until no improvements are remaining. + static void createAndOptimizeReplicateRegions(VPlan &Plan, + VPRecipeBuilder &Builder); /// Remove redundant VPBasicBlocks by merging them into their predecessor if /// the predecessor has a single successor.