diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0cfe64a5f0374..45d71d5a4b3c9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -569,11 +569,6 @@ class InnerLoopVectorizer { Value *CountRoundDown, Value *EndValue, BasicBlock *MiddleBlock, BasicBlock *VectorHeader); - /// Introduce a conditional branch (on true, condition to be set later) at the - /// end of the header=latch connecting it to itself (across the backedge) and - /// to the exit block of \p L. - void createHeaderBranch(Loop *L); - /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(VPTransformState &State); @@ -630,9 +625,8 @@ class InnerLoopVectorizer { BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass); /// Emit basic blocks (prefixed with \p Prefix) for the iteration check, - /// vector loop preheader, middle block and scalar preheader. Also - /// allocate a loop object for the new vector loop and return it. - Loop *createVectorLoopSkeleton(StringRef Prefix); + /// vector loop preheader, middle block and scalar preheader. + void createVectorLoopSkeleton(StringRef Prefix); /// Create new phi nodes for the induction variables to resume iteration count /// in the scalar epilogue, from where the vectorized loop left off. @@ -2835,23 +2829,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, PredicatedInstructions.push_back(Cloned); } -void InnerLoopVectorizer::createHeaderBranch(Loop *L) { - BasicBlock *Header = L->getHeader(); - assert(!L->getLoopLatch() && "loop should not have a latch at this point"); - - IRBuilder<> B(Header->getTerminator()); - Instruction *OldInst = - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); - setDebugLocFromInst(OldInst, &B); - - // Connect the header to the exit and header blocks and replace the old - // terminator. - B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header); - - // Now we have two terminators. Remove the old one from the block. - Header->getTerminator()->eraseFromParent(); -} - Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) { if (TripCount) return TripCount; @@ -3094,7 +3071,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { return MemCheckBlock; } -Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { +void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); assert(LoopVectorPreHeader && "Invalid loop structure"); @@ -3126,12 +3103,8 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc()); ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst); - // We intentionally don't let SplitBlock to update LoopInfo since - // LoopVectorBody should belong to another loop than LoopVectorPreHeader. - // LoopVectorBody is explicitly added to the correct place few lines later. - BasicBlock *LoopVectorBody = - SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, - nullptr, nullptr, Twine(Prefix) + "vector.body"); + SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, + nullptr, nullptr, Twine(Prefix) + "vector.body"); // Update dominator for loop exit. if (!Cost->requiresScalarEpilogue(VF)) @@ -3139,20 +3112,6 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { // middle block to exit blocks and thus no need to update the immediate // dominator of the exit blocks. DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); - - // Create and register the new vector loop. - Loop *Lp = LI->AllocateLoop(); - Loop *ParentLoop = OrigLoop->getParentLoop(); - - // Insert the new loop into the loop nest and register the new basic blocks - // before calling any utilities such as SCEV that require valid LoopInfo. - if (ParentLoop) { - ParentLoop->addChildLoop(Lp); - } else { - LI->addTopLevelLoop(Lp); - } - Lp->addBasicBlockToLoop(LoopVectorBody, *LI); - return Lp; } void InnerLoopVectorizer::createInductionResumeValues( @@ -3264,7 +3223,6 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(MDNode *OrigLoopID) { #ifdef EXPENSIVE_CHECKS assert(DT->verify(DominatorTree::VerificationLevel::Fast)); - LI->verify(*DT); #endif return LoopVectorPreHeader; @@ -3318,7 +3276,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { // Create an empty vector loop, and prepare basic blocks for the runtime // checks. - Loop *Lp = createVectorLoopSkeleton(""); + createVectorLoopSkeleton(""); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. This check also covers the case where the @@ -3336,8 +3294,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { // faster. emitMemRuntimeChecks(LoopScalarPreHeader); - createHeaderBranch(Lp); - // Emit phis for the new starting index of the scalar loop. createInductionResumeValues(); @@ -7624,7 +7580,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, // 1. Create a new empty loop. Unlink the old loop and connect the new one. VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; - std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = + std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); ILV.collectPoisonGeneratingRecipes(State); @@ -7750,7 +7706,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { // can hit the same issue for any SCEV, or ValueTracking query done during // mutation. See PR49900. getOrCreateTripCount(OrigLoop->getLoopPreheader()); - Loop *Lp = createVectorLoopSkeleton(""); + createVectorLoopSkeleton(""); // Generate the code to check the minimum iteration count of the vector // epilogue (see below). @@ -7779,7 +7735,6 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { // Generate the induction variable. Value *CountRoundDown = getOrCreateVectorTripCount(LoopVectorPreHeader); EPI.VectorTripCount = CountRoundDown; - createHeaderBranch(Lp); // Skip induction resume value creation here because they will be created in // the second pass. If we created them here, they wouldn't be used anyway, @@ -7871,7 +7826,7 @@ EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(BasicBlock *Bypass, std::pair EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { MDNode *OrigLoopID = OrigLoop->getLoopID(); - Loop *Lp = createVectorLoopSkeleton("vec.epilog."); + createVectorLoopSkeleton("vec.epilog."); // Now, compare the remaining count and if there aren't enough iterations to // execute the vectorized epilogue skip to the scalar part. @@ -7952,9 +7907,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0), EPI.MainLoopIterationCountCheck); - // Generate the induction variable. - createHeaderBranch(Lp); - // Generate induction resume values. These variables save the new starting // indexes for the scalar loop. They are used to test if there are any tail // iterations left once the vector loop has completed. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 3a2cb7f161bcf..05a92cba79b21 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -318,11 +318,16 @@ void VPBasicBlock::execute(VPTransformState *State) { // Temporarily terminate with unreachable until CFG is rewired. UnreachableInst *Terminator = State->Builder.CreateUnreachable(); State->Builder.SetInsertPoint(Terminator); - // Register NewBB in its loop. In innermost loops its the same for all BB's. - State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); State->CFG.PrevBB = NewBB; } + if (State->CurrentVectorLoop && + !State->CurrentVectorLoop->contains(State->CFG.PrevBB)) { + // Register NewBB in its loop. In innermost loops its the same for all BB's. + State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB, + *State->LI); + } + // 2. Fill the IR basic block with IR instructions. LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() << " in BB:" << NewBB->getName() << '\n'); @@ -447,6 +452,17 @@ void VPRegionBlock::execute(VPTransformState *State) { ReversePostOrderTraversal RPOT(Entry); if (!isReplicator()) { + // Create and register the new vector loop. + State->CurrentVectorLoop = State->LI->AllocateLoop(); + Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader); + + // Insert the new loop into the loop nest and register the new basic blocks + // before calling any utilities such as SCEV that require valid LoopInfo. + if (ParentLoop) + ParentLoop->addChildLoop(State->CurrentVectorLoop); + else + State->LI->addTopLevelLoop(State->CurrentVectorLoop); + // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { if (EnableVPlanNativePath) { @@ -857,7 +873,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, // Check if the backedge taken count is needed, and if so build it. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { - IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); + IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator()); auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TripCountV->getType(), 1), "trip.count.minus.1"); @@ -898,17 +914,16 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, /// LoopVectorBody basic-block was created for this. Introduce additional /// basic-blocks as needed, and fill them all. void VPlan::execute(VPTransformState *State) { - // 0. Set the reverse mapping from VPValues to Values for code generation. + // Set the reverse mapping from VPValues to Values for code generation. for (auto &Entry : Value2VPValue) State->VPValue2Value[Entry.second] = Entry.first; - BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; - State->CFG.VectorPreHeader = VectorPreHeaderBB; - BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); - assert(VectorHeaderBB && "Loop preheader does not have a single successor."); - + // Initialize CFG state. + State->CFG.PrevVPBB = nullptr; + BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor(); + State->CFG.PrevBB = VectorHeaderBB; + State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor(); State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB); - State->CFG.ExitBB = State->CurrentVectorLoop->getExitBlock(); // Remove the edge between Header and Latch to allow other connections. // Temporarily terminate with unreachable until CFG is rewired. @@ -920,9 +935,6 @@ void VPlan::execute(VPTransformState *State) { State->Builder.SetInsertPoint(Terminator); // Generate code in loop body. - State->CFG.PrevVPBB = nullptr; - State->CFG.PrevBB = VectorHeaderBB; - for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State);