diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 78fc333e55401f..9de6ec40a5d8bd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -567,7 +567,7 @@ class InnerLoopVectorizer { /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *CountRoundDown, Value *EndValue, - BasicBlock *MiddleBlock); + BasicBlock *MiddleBlock, BasicBlock *VectorHeader); /// Introduce a conditional branch (on true, condition to be set later) at the /// end of the header=latch connecting it to itself (across the backedge) and @@ -735,9 +735,6 @@ class InnerLoopVectorizer { /// there can be multiple exiting edges reaching this block. BasicBlock *LoopExitBlock; - /// The vector loop body. - BasicBlock *LoopVectorBody; - /// The scalar loop body. BasicBlock *LoopScalarBody; @@ -3132,7 +3129,7 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { // We intentionally don't let SplitBlock to update LoopInfo since // LoopVectorBody should belong to another loop than LoopVectorPreHeader. // LoopVectorBody is explicitly added to the correct place few lines later. - LoopVectorBody = + BasicBlock *LoopVectorBody = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, nullptr, nullptr, Twine(Prefix) + "vector.body"); @@ -3361,7 +3358,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() { void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *CountRoundDown, Value *EndValue, - BasicBlock *MiddleBlock) { + BasicBlock *MiddleBlock, + BasicBlock *VectorHeader) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate // value (the value that feeds into the phi from the loop latch). @@ -3406,7 +3404,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, CMO->setName("cast.cmo"); Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(), - LoopVectorBody->getTerminator()); + VectorHeader->getTerminator()); Value *Escape = emitTransformedIndex(B, CMO, II.getStartValue(), Step, II); Escape->setName("ind.escape"); @@ -3723,15 +3721,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); + Loop *VectorLoop = LI->getLoopFor(State.CFG.PrevBB); // If we inserted an edge from the middle block to the unique exit block, // update uses outside the loop (phis) to account for the newly inserted // edge. if (!Cost->requiresScalarEpilogue(VF)) { // Fix-up external users of the induction variables. for (auto &Entry : Legal->getInductionVars()) - fixupIVUsers(Entry.first, Entry.second, - getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)), - IVEndValues[Entry.first], LoopMiddleBlock); + fixupIVUsers( + Entry.first, Entry.second, getOrCreateVectorTripCount(VectorLoop), + IVEndValues[Entry.first], LoopMiddleBlock, VectorLoop->getHeader()); fixLCSSAPHIs(State); } @@ -3740,7 +3739,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { sinkScalarOperands(&*PI); // Remove redundant induction instructions. - cse(LoopVectorBody); + cse(VectorLoop->getHeader()); // Set/update profile weights for the vector and remainder loops as original // loop iterations are now distributed among them. Note that original loop @@ -3755,9 +3754,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // For scalable vectorization we can't know at compile time how many iterations // of the loop are handled in one vector iteration, so instead assume a pessimistic // vscale of '1'. - setProfileInfoAfterUnrolling( - LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody), - LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF); + setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody), VectorLoop, + LI->getLoopFor(LoopScalarBody), + VF.getKnownMinValue() * UF); } void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { @@ -3916,6 +3915,8 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, setDebugLocFromInst(LoopExitInst); Type *PhiTy = OrigPhi->getType(); + BasicBlock *VectorLoopLatch = + LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already @@ -3945,8 +3946,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, TargetTransformInfo::ReductionFlags())) { auto *VecRdxPhi = cast(State.get(PhiR, Part)); - VecRdxPhi->setIncomingValueForBlock( - LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel); + VecRdxPhi->setIncomingValueForBlock(VectorLoopLatch, Sel); } } } @@ -3957,8 +3957,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - Builder.SetInsertPoint( - LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator()); + Builder.SetInsertPoint(VectorLoopLatch->getTerminator()); VectorParts RdxParts(UF); for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = State.get(LoopExitInstDef, Part); @@ -4319,7 +4318,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader); // A pointer induction, performed by using a gep - BasicBlock *LoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); + BasicBlock *LoopLatch = LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); Instruction *InductionLoc = LoopLatch->getTerminator(); const SCEV *ScalarStep = II.getStep(); SCEVExpander Exp(*PSE.getSE(), DL, "induction");