diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 772b276df124a..f829ef52d50bb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -569,11 +569,6 @@ class InnerLoopVectorizer { Value *CountRoundDown, Value *EndValue, BasicBlock *MiddleBlock, BasicBlock *VectorHeader); - /// Introduce a conditional branch (on true, condition to be set later) at the - /// end of the header=latch connecting it to itself (across the backedge) and - /// to the exit block of \p L. - void createHeaderBranch(Loop *L); - /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(VPTransformState &State); @@ -631,8 +626,8 @@ class InnerLoopVectorizer { /// Emit basic blocks (prefixed with \p Prefix) for the iteration check, /// vector loop preheader, middle block and scalar preheader. Also - /// allocate a loop object for the new vector loop and return it. - Loop *createVectorLoopSkeleton(StringRef Prefix); + /// allocate a loop object for the new vector loop. + void createVectorLoopSkeleton(StringRef Prefix); /// Create new phi nodes for the induction variables to resume iteration count /// in the scalar epilogue, from where the vectorized loop left off. @@ -2833,23 +2828,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, PredicatedInstructions.push_back(Cloned); } -void InnerLoopVectorizer::createHeaderBranch(Loop *L) { - BasicBlock *Header = L->getHeader(); - assert(!L->getLoopLatch() && "loop should not have a latch at this point"); - - IRBuilder<> B(Header->getTerminator()); - Instruction *OldInst = - getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); - setDebugLocFromInst(OldInst, &B); - - // Connect the header to the exit and header blocks and replace the old - // terminator. - B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header); - - // Now we have two terminators. Remove the old one from the block. - Header->getTerminator()->eraseFromParent(); -} - Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) { if (TripCount) return TripCount; @@ -3092,7 +3070,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { return MemCheckBlock; } -Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { +void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); assert(LoopVectorPreHeader && "Invalid loop structure"); @@ -3150,7 +3128,6 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LI->addTopLevelLoop(Lp); } Lp->addBasicBlockToLoop(LoopVectorBody, *LI); - return Lp; } void InnerLoopVectorizer::createInductionResumeValues( @@ -3316,7 +3293,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { // Create an empty vector loop, and prepare basic blocks for the runtime // checks. - Loop *Lp = createVectorLoopSkeleton(""); + createVectorLoopSkeleton(""); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. This check also covers the case where the @@ -3334,8 +3311,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { // faster. emitMemRuntimeChecks(LoopScalarPreHeader); - createHeaderBranch(Lp); - // Emit phis for the new starting index of the scalar loop. createInductionResumeValues(); @@ -7622,7 +7597,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, // 1. Create a new empty loop. Unlink the old loop and connect the new one. VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; - std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = + std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); ILV.collectPoisonGeneratingRecipes(State); @@ -7739,7 +7714,7 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } std::pair EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { MDNode *OrigLoopID = OrigLoop->getLoopID(); - Loop *Lp = createVectorLoopSkeleton(""); + createVectorLoopSkeleton(""); // Generate the code to check the minimum iteration count of the vector // epilogue (see below). @@ -7768,7 +7743,6 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { // Generate the induction variable. Value *CountRoundDown = getOrCreateVectorTripCount(LoopVectorPreHeader); EPI.VectorTripCount = CountRoundDown; - createHeaderBranch(Lp); // Skip induction resume value creation here because they will be created in // the second pass. If we created them here, they wouldn't be used anyway, @@ -7860,7 +7834,7 @@ EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(BasicBlock *Bypass, std::pair EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { MDNode *OrigLoopID = OrigLoop->getLoopID(); - Loop *Lp = createVectorLoopSkeleton("vec.epilog."); + createVectorLoopSkeleton("vec.epilog."); // Now, compare the remaining count and if there aren't enough iterations to // execute the vectorized epilogue skip to the scalar part. @@ -7941,9 +7915,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0), EPI.MainLoopIterationCountCheck); - // Generate the induction variable. - createHeaderBranch(Lp); - // Generate induction resume values. These variables save the new starting // indexes for the scalar loop. They are used to test if there are any tail // iterations left once the vector loop has completed. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 3a2cb7f161bcf..60ca5d36e65ac 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -857,7 +857,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, // Check if the backedge taken count is needed, and if so build it. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { - IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); + IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator()); auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TripCountV->getType(), 1), "trip.count.minus.1"); @@ -898,17 +898,16 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, /// LoopVectorBody basic-block was created for this. Introduce additional /// basic-blocks as needed, and fill them all. void VPlan::execute(VPTransformState *State) { - // 0. Set the reverse mapping from VPValues to Values for code generation. + // Set the reverse mapping from VPValues to Values for code generation. for (auto &Entry : Value2VPValue) State->VPValue2Value[Entry.second] = Entry.first; - BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; - State->CFG.VectorPreHeader = VectorPreHeaderBB; - BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); - assert(VectorHeaderBB && "Loop preheader does not have a single successor."); - + // Initialize CFG state. + State->CFG.PrevVPBB = nullptr; + BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor(); + State->CFG.PrevBB = VectorHeaderBB; + State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor(); State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB); - State->CFG.ExitBB = State->CurrentVectorLoop->getExitBlock(); // Remove the edge between Header and Latch to allow other connections. // Temporarily terminate with unreachable until CFG is rewired. @@ -920,9 +919,6 @@ void VPlan::execute(VPTransformState *State) { State->Builder.SetInsertPoint(Terminator); // Generate code in loop body. - State->CFG.PrevVPBB = nullptr; - State->CFG.PrevBB = VectorHeaderBB; - for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State);