diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 5dc14dbe6574b..4a61c2bc35c72 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -542,10 +542,13 @@ class InterleavedAccessInfo { /// formation for predicated accesses, we may be able to relax this limitation /// in the future once we handle more complicated blocks. void reset() { - InterleaveGroupMap.clear(); - for (auto *Ptr : InterleaveGroups) + SmallPtrSet *, 4> DelSet; + // Avoid releasing a pointer twice. + for (auto &I : InterleaveGroupMap) + DelSet.insert(I.second); + for (auto *Ptr : DelSet) delete Ptr; - InterleaveGroups.clear(); + InterleaveGroupMap.clear(); RequiresScalarEpilogue = false; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 614f931cbc684..a5e85f27fabfa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -201,9 +201,6 @@ class LoopVectorizationPlanner { /// The profitability analysis. LoopVectorizationCostModel &CM; - /// The interleaved access analysis. - InterleavedAccessInfo &IAI; - SmallVector VPlans; /// This class is used to enable the VPlan to invoke a method of ILV. This is @@ -226,10 +223,8 @@ class LoopVectorizationPlanner { LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, - LoopVectorizationCostModel &CM, - InterleavedAccessInfo &IAI) - : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), - IAI(IAI) {} + LoopVectorizationCostModel &CM) + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f10f0f3320d07..9368dd7c8b143 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6710,6 +6710,37 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; } +VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, + VFRange &Range, + VPlanPtr &Plan) { + const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); + if (!IG) + return nullptr; + + // Now check if IG is relevant for VF's in the given range. + auto isIGMember = [&](Instruction *I) -> std::function { + return [=](unsigned VF) -> bool { + return (VF >= 2 && // Query is illegal for VF == 1 + CM.getWideningDecision(I, VF) == + LoopVectorizationCostModel::CM_Interleave); + }; + }; + if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range)) + return nullptr; + + // I is a member of an InterleaveGroup for VF's in the (possibly trimmed) + // range. If it's the primary member of the IG construct a VPInterleaveRecipe. + // Otherwise, it's an adjunct member of the IG, do not construct any Recipe. + assert(I == IG->getInsertPos() && + "Generating a recipe for an adjunct member of an interleave group"); + + VPValue *Mask = nullptr; + if (Legal->isMaskRequired(I)) + Mask = createBlockInMask(I->getParent(), Plan); + + return new VPInterleaveRecipe(IG, Mask); +} + VPWidenMemoryInstructionRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { @@ -6726,6 +6757,8 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, CM.getWideningDecision(I, VF); assert(Decision != LoopVectorizationCostModel::CM_Unknown && "CM decision should be taken at this point."); + assert(Decision != LoopVectorizationCostModel::CM_Interleave && + "Interleave memory opportunity should be caught earlier."); return Decision != LoopVectorizationCostModel::CM_Scalarize; }; @@ -6890,21 +6923,15 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB, if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return false; - // If this ingredient's recipe is to be recorded, keep its recipe a singleton - // to avoid having to split recipes later. - bool IsSingleton = Ingredient2Recipe.count(I); - // Success: widen this instruction. We optimize the common case where // consecutive instructions can be represented by a single recipe. - if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() && - LastExtensibleRecipe->appendInstruction(I)) - return true; + if (!VPBB->empty()) { + VPWidenRecipe *LastWidenRecipe = dyn_cast(&VPBB->back()); + if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) + return true; + } - VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I); - if (!IsSingleton) - LastExtensibleRecipe = WidenRecipe; - setRecipe(I, WidenRecipe); - VPBB->appendRecipe(WidenRecipe); + VPBB->appendRecipe(new VPWidenRecipe(I)); return true; } @@ -6920,7 +6947,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated); - setRecipe(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into @@ -6979,20 +7005,36 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan, VPBasicBlock *VPBB) { VPRecipeBase *Recipe = nullptr; + // Check if Instr should belong to an interleave memory recipe, or already + // does. In the latter case Instr is irrelevant. + if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) { + VPBB->appendRecipe(Recipe); + return true; + } + + // Check if Instr is a memory operation that should be widened. + if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { + VPBB->appendRecipe(Recipe); + return true; + } - // First, check for specific widening recipes that deal with memory - // operations, inductions and Phi nodes. - if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) || - (Recipe = tryToOptimizeInduction(Instr, Range)) || - (Recipe = tryToBlend(Instr, Plan)) || - (isa(Instr) && - (Recipe = new VPWidenPHIRecipe(cast(Instr))))) { - setRecipe(Instr, Recipe); + // Check if Instr should form some PHI recipe. + if ((Recipe = tryToOptimizeInduction(Instr, Range))) { + VPBB->appendRecipe(Recipe); + return true; + } + if ((Recipe = tryToBlend(Instr, Plan))) { VPBB->appendRecipe(Recipe); return true; } + if (PHINode *Phi = dyn_cast(Instr)) { + VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); + return true; + } - // Check if Instr is to be widened by a general VPWidenRecipe. + // Check if Instr is to be widened by a general VPWidenRecipe, after + // having first checked for specific widening recipes that deal with + // Interleave Groups, Inductions and Phi nodes. if (tryToWiden(Instr, VPBB, Range)) return true; @@ -7048,57 +7090,19 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &NeedDef, SmallPtrSetImpl &DeadInstructions) { - // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap PredInst2Recipe; DenseMap &SinkAfter = Legal->getSinkAfter(); - - SmallPtrSet *, 1> InterleaveGroups; - - VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); - - // --------------------------------------------------------------------------- - // Pre-construction: record ingredients whose recipes we'll need to further - // process after constructing the initial VPlan. - // --------------------------------------------------------------------------- - - // Mark instructions we'll need to sink later and their targets as - // ingredients whose recipe we'll need to record. - for (auto &Entry : SinkAfter) { - RecipeBuilder.recordRecipeOf(Entry.first); - RecipeBuilder.recordRecipeOf(Entry.second); - } - - // For each interleave group which is relevant for this (possibly trimmed) - // Range, add it to the set of groups to be later applied to the VPlan and add - // placeholders for its members' Recipes which we'll be replacing with a - // single VPInterleaveRecipe. - for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { - auto applyIG = [IG, this](unsigned VF) -> bool { - return (VF >= 2 && // Query is illegal for VF == 1 - CM.getWideningDecision(IG->getInsertPos(), VF) == - LoopVectorizationCostModel::CM_Interleave); - }; - if (!getDecisionAndClampRange(applyIG, Range)) - continue; - InterleaveGroups.insert(IG); - for (unsigned i = 0; i < IG->getFactor(); i++) - if (Instruction *Member = IG->getMember(i)) - RecipeBuilder.recordRecipeOf(Member); - }; - - // --------------------------------------------------------------------------- - // Build initial VPlan: Scan the body of the loop in a topological order to - // visit each basic block after having visited its predecessor basic blocks. - // --------------------------------------------------------------------------- + DenseMap SinkAfterInverse; // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = std::make_unique(VPBB); + VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); @@ -7119,7 +7123,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( std::vector Ingredients; - // Introduce each ingredient into VPlan. + // Organize the ingredients to vectorize from current basic block in the + // right order. for (Instruction &I : BB->instructionsWithoutDebug()) { Instruction *Instr = &I; @@ -7129,6 +7134,43 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( DeadInstructions.find(Instr) != DeadInstructions.end()) continue; + // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct + // member of the IG, do not construct any Recipe for it. + const InterleaveGroup *IG = + CM.getInterleavedAccessGroup(Instr); + if (IG && Instr != IG->getInsertPos() && + Range.Start >= 2 && // Query is illegal for VF == 1 + CM.getWideningDecision(Instr, Range.Start) == + LoopVectorizationCostModel::CM_Interleave) { + auto SinkCandidate = SinkAfterInverse.find(Instr); + if (SinkCandidate != SinkAfterInverse.end()) + Ingredients.push_back(SinkCandidate->second); + continue; + } + + // Move instructions to handle first-order recurrences, step 1: avoid + // handling this instruction until after we've handled the instruction it + // should follow. + auto SAIt = SinkAfter.find(Instr); + if (SAIt != SinkAfter.end()) { + LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" + << *SAIt->second + << " to vectorize a 1st order recurrence.\n"); + SinkAfterInverse[SAIt->second] = Instr; + continue; + } + + Ingredients.push_back(Instr); + + // Move instructions to handle first-order recurrences, step 2: push the + // instruction to be sunk at its insertion point. + auto SAInvIt = SinkAfterInverse.find(Instr); + if (SAInvIt != SinkAfterInverse.end()) + Ingredients.push_back(SAInvIt->second); + } + + // Introduce each ingredient into VPlan. + for (Instruction *Instr : Ingredients) { if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB)) continue; @@ -7153,32 +7195,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; - // --------------------------------------------------------------------------- - // Transform initial VPlan: Apply previously taken decisions, in order, to - // bring the VPlan to its final state. - // --------------------------------------------------------------------------- - - // Apply Sink-After legal constraints. - for (auto &Entry : SinkAfter) { - VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); - VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); - Sink->moveAfter(Target); - } - - // Interleave memory: for each Interleave Group we marked earlier as relevant - // for this VPlan, replace the Recipes widening its memory instructions with a - // single VPInterleaveRecipe at its insertion point. - for (auto IG : InterleaveGroups) { - auto *Recipe = cast( - RecipeBuilder.getRecipe(IG->getInsertPos())); - (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe); - - for (unsigned i = 0; i < IG->getFactor(); ++i) - if (Instruction *Member = IG->getMember(i)) { - RecipeBuilder.getRecipe(Member)->eraseFromParent(); - } - } - // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. if (CM.foldTailByMasking()) { @@ -7411,11 +7427,12 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { - VPValue *Mask = getMask(); - if (!Mask) + if (!User) return State.ILV->vectorizeMemoryInstruction(&Instr); + // Last (and currently only) operand is a mask. InnerLoopVectorizer::VectorParts MaskValues(State.UF); + VPValue *Mask = User->getOperand(User->getNumOperands() - 1); for (unsigned Part = 0; Part < State.UF; ++Part) MaskValues[Part] = State.get(Mask, Part); State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); @@ -7464,7 +7481,7 @@ static bool processLoopInVPlanNativePath( // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM); // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); @@ -7624,7 +7641,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { CM.collectValuesToIgnore(); // Use the planner for vectorization. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 598fb00e956ea..0ca6a6b93cfd7 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -47,24 +47,6 @@ class VPRecipeBuilder { EdgeMaskCacheTy EdgeMaskCache; BlockMaskCacheTy BlockMaskCache; - // VPlan-VPlan transformations support: Hold a mapping from ingredients to - // their recipe. To save on memory, only do so for selected ingredients, - // marked by having a nullptr entry in this map. If those ingredients get a - // VPWidenRecipe, also avoid compressing other ingredients into it to avoid - // having to split such recipes later. - DenseMap Ingredient2Recipe; - VPWidenRecipe *LastExtensibleRecipe = nullptr; - - /// Set the recipe created for given ingredient. This operation is a no-op for - /// ingredients that were not marked using a nullptr entry in the map. - void setRecipe(Instruction *I, VPRecipeBase *R) { - if (!Ingredient2Recipe.count(I)) - return; - assert(Ingredient2Recipe[I] == nullptr && - "Recipe already set for ingredient"); - Ingredient2Recipe[I] = R; - } - public: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* @@ -75,22 +57,16 @@ class VPRecipeBuilder { /// and DST. VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); - /// Mark given ingredient for recording its recipe once one is created for - /// it. - void recordRecipeOf(Instruction *I) { - assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) && - "Recipe already set for ingredient"); - Ingredient2Recipe[I] = nullptr; - } - - /// Return the recipe created for given ingredient. - VPRecipeBase *getRecipe(Instruction *I) { - assert(Ingredient2Recipe.count(I) && - "Recording this ingredients recipe was not requested"); - assert(Ingredient2Recipe[I] != nullptr && - "Ingredient doesn't have a recipe"); - return Ingredient2Recipe[I]; - } + /// Check if \I belongs to an Interleave Group within the given VF \p Range, + /// \return true in the first returned value if so and false otherwise. + /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG + /// for \p Range.Start, and provide it as the second returned value. + /// Note that if \I is an adjunct member of an IG for \p Range.Start, the + /// \return value is , as it is handled by another recipe. + /// \p Range.End may be decreased to ensure same decision from \p Range.Start + /// to \p Range.End. + VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan); /// Check if \I is a memory instruction to be widened for \p Range.Start and /// potentially masked. Such instructions are handled by a recipe that takes diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index bc32e54be7204..4b80d1fb20aa5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -275,35 +275,18 @@ void VPRegionBlock::execute(VPTransformState *State) { } void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { - assert(!Parent && "Recipe already in some VPBasicBlock"); - assert(InsertPos->getParent() && - "Insertion position not in any VPBasicBlock"); Parent = InsertPos->getParent(); Parent->getRecipeList().insert(InsertPos->getIterator(), this); } -void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { - assert(!Parent && "Recipe already in some VPBasicBlock"); - assert(InsertPos->getParent() && - "Insertion position not in any VPBasicBlock"); - Parent = InsertPos->getParent(); - Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); -} - -void VPRecipeBase::removeFromParent() { - assert(getParent() && "Recipe not in any VPBasicBlock"); - getParent()->getRecipeList().remove(getIterator()); - Parent = nullptr; -} - iplist::iterator VPRecipeBase::eraseFromParent() { - assert(getParent() && "Recipe not in any VPBasicBlock"); return getParent()->getRecipeList().erase(getIterator()); } void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { - removeFromParent(); - insertAfter(InsertPos); + InsertPos->getParent()->getRecipeList().splice( + std::next(InsertPos->getIterator()), getParent()->getRecipeList(), + getIterator()); } void VPInstruction::generateInstruction(VPTransformState &State, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 226c6c0279d7e..6eeec0f21fd06 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -567,7 +567,6 @@ class VPBlockBase { /// instructions. class VPRecipeBase : public ilist_node_with_parent { friend VPBasicBlock; - friend class VPBlockUtils; private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -616,18 +615,10 @@ class VPRecipeBase : public ilist_node_with_parent { /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); - /// Insert an unlinked Recipe into a basic block immediately after - /// the specified Recipe. - void insertAfter(VPRecipeBase *InsertPos); - /// Unlink this recipe from its current VPBasicBlock and insert it into /// the VPBasicBlock that MovePos lives in, right after MovePos. void moveAfter(VPRecipeBase *MovePos); - /// This method unlinks 'this' from the containing basic block, but does not - /// delete it. - void removeFromParent(); - /// This method unlinks 'this' from the containing basic block and deletes it. /// /// \returns an iterator pointing to the element after the erased one @@ -982,13 +973,6 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; } - /// Return the mask used by this recipe. Note that a full mask is represented - /// by a nullptr. - VPValue *getMask() { - // Mask is the last operand. - return User ? User->getOperand(User->getNumOperands() - 1) : nullptr; - } - /// Generate the wide load/store. void execute(VPTransformState &State) override; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 67936a83efaf6..57567e7d8434b 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -83,7 +83,6 @@ TEST(VPInstructionTest, moveAfter) { CHECK_ITERATOR(VPBB1, I2, I1); CHECK_ITERATOR(VPBB2, I4, I3, I5); - EXPECT_EQ(I3->getParent(), I4->getParent()); } } // namespace