diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7516e3ecbd287..0d22b09cf00fb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2964,34 +2964,6 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarPreHeader = SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI, nullptr, Twine(Prefix) + "scalar.ph"); - - // Set up the middle block terminator. Two cases: - // 1) If we know that we must execute the scalar epilogue, retain the existing - // unconditional branch from the middle block to the scalar preheader. In that - // case, there's no edge from the middle block to exit blocks and thus no - // need to update the immediate dominator of the exit blocks. - if (Cost->requiresScalarEpilogue(VF.isVector())) { - assert( - LoopMiddleBlock->getSingleSuccessor() == LoopScalarPreHeader && - " middle block should have the scalar preheader as single successor"); - return; - } - - // 2) Otherwise, we must have a single unique exit block (due to how we - // implement the multiple exit case). In this case, set up a conditional - // branch from the middle block to the loop scalar preheader, and the - // exit block. completeLoopSkeleton will update the condition to use an - // iteration check, if required to decide whether to execute the remainder. - BranchInst *BrInst = - BranchInst::Create(LoopExitBlock, LoopScalarPreHeader, Builder.getTrue()); - auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator(); - BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc()); - ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst); - - // Update dominator for loop exit. During skeleton creation, only the vector - // pre-header and the middle block are created. The vector loop is entirely - // created during VPlan exection. - DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); } PHINode *InnerLoopVectorizer::createInductionResumeValue( @@ -3088,51 +3060,6 @@ void InnerLoopVectorizer::createInductionResumeValues( } } -BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() { - // The trip counts should be cached by now. - Value *Count = getTripCount(); - Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); - - auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator(); - - // Add a check in the middle block to see if we have completed - // all of the iterations in the first vector loop. Three cases: - // 1) If we require a scalar epilogue, there is no conditional branch as - // we unconditionally branch to the scalar preheader. Do nothing. - // 2) If (N - N%VF) == N, then we *don't* need to run the remainder. - // Thus if tail is to be folded, we know we don't need to run the - // remainder and we can use the previous value for the condition (true). - // 3) Otherwise, construct a runtime check. - if (!Cost->requiresScalarEpilogue(VF.isVector()) && - !Cost->foldTailByMasking()) { - // Here we use the same DebugLoc as the scalar loop latch terminator instead - // of the corresponding compare because they may have ended up with - // different line numbers and we want to avoid awkward line stepping while - // debugging. Eg. if the compare has got a line number inside the loop. - // TODO: At the moment, CreateICmpEQ will simplify conditions with constant - // operands. Perform simplification directly on VPlan once the branch is - // modeled there. - IRBuilder<> B(LoopMiddleBlock->getTerminator()); - B.SetCurrentDebugLocation(ScalarLatchTerm->getDebugLoc()); - Value *CmpN = B.CreateICmpEQ(Count, VectorTripCount, "cmp.n"); - BranchInst &BI = *cast(LoopMiddleBlock->getTerminator()); - BI.setCondition(CmpN); - if (hasBranchWeightMD(*ScalarLatchTerm)) { - // Assume that `Count % VectorTripCount` is equally distributed. - unsigned TripCount = UF * VF.getKnownMinValue(); - assert(TripCount > 0 && "trip count should not be zero"); - const uint32_t Weights[] = {1, TripCount - 1}; - setBranchWeights(BI, Weights, /*IsExpected=*/false); - } - } - -#ifdef EXPENSIVE_CHECKS - assert(DT->verify(DominatorTree::VerificationLevel::Fast)); -#endif - - return LoopVectorPreHeader; -} - std::pair InnerLoopVectorizer::createVectorizedLoopSkeleton( const SCEV2ValueTy &ExpandedSCEVs) { @@ -3155,17 +3082,18 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( | [ ]_| <-- vector loop (created during VPlan execution). | | | v - \ -[ ] <--- middle-block. + \ -[ ] <--- middle-block (wrapped in VPIRBasicBlock with the branch to + | | successors created during VPlan execution) \/ | /\ v - | ->[ ] <--- new preheader. + | ->[ ] <--- new preheader (wrapped in VPIRBasicBlock). | | (opt) v <-- edge from middle to exit iff epilogue is not required. | [ ] \ | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue). \ | \ v - >[ ] <-- exit block(s). + >[ ] <-- exit block(s). (wrapped in VPIRBasicBlock) ... */ @@ -3192,7 +3120,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( // Emit phis for the new starting index of the scalar loop. createInductionResumeValues(ExpandedSCEVs); - return {completeLoopSkeleton(), nullptr}; + return {LoopVectorPreHeader, nullptr}; } // Fix up external users of the induction variable. At this point, we are @@ -7472,6 +7400,9 @@ LoopVectorizationPlanner::executePlan( std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(ExpandedSCEVs ? *ExpandedSCEVs : State.ExpandedSCEVs); +#ifdef EXPENSIVE_CHECKS + assert(DT->verify(DominatorTree::VerificationLevel::Fast)); +#endif // Only use noalias metadata when using memory checks guaranteeing no overlap // across all iterations. @@ -7552,6 +7483,18 @@ LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); + // 4. Adjust branch weight of the branch in the middle block. + auto *MiddleTerm = + cast(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); + if (MiddleTerm->isConditional() && + hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { + // Assume that `Count % VectorTripCount` is equally distributed. + unsigned TripCount = State.UF * State.VF.getKnownMinValue(); + assert(TripCount > 0 && "trip count should not be zero"); + const uint32_t Weights[] = {1, TripCount - 1}; + setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); + } + return {State.ExpandedSCEVs, ReductionResumeValues}; } @@ -7608,7 +7551,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton( // inductions in the epilogue loop are created before executing the plan for // the epilogue loop. - return {completeLoopSkeleton(), nullptr}; + return {LoopVectorPreHeader, nullptr}; } void EpilogueVectorizerMainLoop::printDebugTracesAtStart() { @@ -7797,7 +7740,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( {VecEpilogueIterationCountCheck, EPI.VectorTripCount} /* AdditionalBypass */); - return {completeLoopSkeleton(), EPResumeVal}; + return {LoopVectorPreHeader, EPResumeVal}; } BasicBlock * @@ -7842,7 +7785,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( setBranchWeights(BI, Weights, /*IsExpected=*/false); } ReplaceInstWithInst(Insert->getTerminator(), &BI); - LoopBypassBlocks.push_back(Insert); return Insert; } @@ -8547,9 +8489,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // modified; a basic block for the vector pre-header, followed by a region for // the vector loop, followed by the middle basic block. The skeleton vector // loop region contains a header and latch basic blocks. + + bool RequiresScalarEpilogueCheck = + LoopVectorizationPlanner::getDecisionAndClampRange( + [this](ElementCount VF) { + return !CM.requiresScalarEpilogue(VF.isVector()); + }, + Range); VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE(), OrigLoop->getLoopPreheader()); + *PSE.getSE(), RequiresScalarEpilogueCheck, CM.foldTailByMasking(), + OrigLoop); VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); @@ -8797,7 +8747,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // Create new empty VPlan auto Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE(), OrigLoop->getLoopPreheader()); + *PSE.getSE(), true, false, OrigLoop); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); @@ -9006,6 +8956,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } } Builder.setInsertPoint(&*LatchVPBB->begin()); + VPBasicBlock *MiddleVPBB = + cast(VectorLoopRegion->getSingleSuccessor()); + VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi(); for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { VPReductionPHIRecipe *PhiR = dyn_cast(&R); @@ -9114,8 +9067,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // also modeled in VPlan. auto *FinalReductionResult = new VPInstruction( VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL); - cast(VectorLoopRegion->getSingleSuccessor()) - ->appendRecipe(FinalReductionResult); + FinalReductionResult->insertBefore(*MiddleVPBB, IP); + IP = std::next(FinalReductionResult->getIterator()); OrigExitingVPV->replaceUsesWithIf( FinalReductionResult, [](VPUser &User, unsigned) { return isa(&User); }); @@ -10156,6 +10109,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { cast(&R)->setStartValue(StartVal); } + assert(DT->verify(DominatorTree::VerificationLevel::Fast) && + "DT not preserved correctly"); LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT, true, &ExpandedSCEVs); ++LoopsEpilogueVectorized; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index d6a5d841e69d7..3f34f14e48ef1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -448,11 +448,13 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { } void VPIRBasicBlock::execute(VPTransformState *State) { - assert(getHierarchicalSuccessors().empty() && - "VPIRBasicBlock cannot have successors at the moment"); - State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator()); executeRecipes(State, getIRBasicBlock()); + if (getSingleSuccessor() && isa(getIRBasicBlock()->getTerminator())) { + auto *Br = State->Builder.CreateBr(getIRBasicBlock()); + Br->setOperand(0, nullptr); + getIRBasicBlock()->getTerminator()->eraseFromParent(); + } for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); @@ -651,16 +653,17 @@ static std::pair cloneSESE(VPBlockBase *Entry); // Clone the CFG for all nodes in the single-entry-single-exit region reachable // from \p Entry, this includes cloning the blocks and their recipes. Operands // of cloned recipes will NOT be updated. Remapping of operands must be done -// separately. Returns a pair with the the new entry and exiting blocks of the -// cloned region. +// separately. Returns a pair with the new entry and exiting blocks of the +// cloned region. If \p Entry isn't part of a region, return nullptr for the exiting block. static std::pair cloneSESE(VPBlockBase *Entry) { DenseMap Old2NewVPBlocks; VPBlockBase *Exiting = nullptr; + bool InRegion = Entry->getParent(); // First, clone blocks reachable from Entry. for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) { VPBlockBase *NewBB = BB->clone(); Old2NewVPBlocks[BB] = NewBB; - if (BB->getNumSuccessors() == 0) { + if (InRegion && BB->getNumSuccessors() == 0) { assert(!Exiting && "Multiple exiting blocks?"); Exiting = BB; } @@ -697,7 +700,7 @@ static std::pair cloneSESE(VPBlockBase *Entry) { } #endif - return std::make_pair(Old2NewVPBlocks[Entry], Old2NewVPBlocks[Exiting]); + return std::make_pair(Old2NewVPBlocks[Entry], InRegion ? Old2NewVPBlocks[Exiting] : nullptr); } VPRegionBlock *VPRegionBlock::clone() { @@ -804,8 +807,9 @@ VPlan::~VPlan() { } VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, - BasicBlock *PH) { - VPIRBasicBlock *Entry = new VPIRBasicBlock(PH); + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop) { + VPIRBasicBlock *Entry= new VPIRBasicBlock(TheLoop->getLoopPreheader()); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique(Entry, VecPreheader); Plan->TripCount = @@ -815,6 +819,42 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + + // Add a check in the middle block to see if we have completed + // all of the iterations in the first vector loop. Three cases: + // 1) If we require a scalar epilogue, there is no conditional branch as + // we unconditionally branch to the scalar preheader. Do nothing. + // 2) If (N - N%VF) == N, then we *don't* need to run the remainder. + // Thus if tail is to be folded, we know we don't need to run the + // remainder and we can use the previous value for the condition (true). + // 3) Otherwise, construct a runtime check. + BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); + if (RequiresScalarEpilogueCheck) { + auto *VPExitBlock = new VPIRBasicBlock(IRExitBlock); + VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); + + auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); + // Here we use the same DebugLoc as the scalar loop latch terminator instead + // of the corresponding compare because they may have ended up with + // different line numbers and we want to avoid awkward line stepping while + // debugging. Eg. if the compare has got a line number inside the loop. + VPValue *Cmp = + TailFolded + ? Plan->getOrAddLiveIn(ConstantInt::getTrue( + IntegerType::getInt1Ty(TripCount->getType()->getContext()))) + : new VPInstruction(Instruction::ICmp, CmpInst::ICMP_EQ, + Plan->getTripCount(), + &Plan->getVectorTripCount(), + ScalarLatchTerm->getDebugLoc(), "cmp.n"); + if (auto *VPI = Cmp->getDefiningRecipe()) + MiddleVPBB->appendRecipe(VPI); + auto *Br = new VPInstruction(VPInstruction::BranchOnCond, {Cmp}, + ScalarLatchTerm->getDebugLoc()); + MiddleVPBB->appendRecipe(Br); + } + VPBasicBlock *PHWrapper = new VPBasicBlock("scalar.ph"); + VPBlockUtils::connectBlocks(MiddleVPBB, PHWrapper); + return Plan; } @@ -860,13 +900,16 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, /// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p /// VPBB are moved to the newly created VPIRBasicBlock. static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { - assert(VPBB->getNumSuccessors() == 0 && "VPBB must be a leave node"); VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB); for (auto &R : make_early_inc_range(*VPBB)) R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end()); VPBlockBase *PredVPBB = VPBB->getSinglePredecessor(); VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); VPBlockUtils::connectBlocks(PredVPBB, IRMiddleVPBB); + for (auto *Succ : to_vector(VPBB->getSuccessors())) { + VPBlockUtils::connectBlocks(IRMiddleVPBB, Succ); + VPBlockUtils::disconnectBlocks(VPBB, Succ); + } delete VPBB; } @@ -879,15 +922,41 @@ void VPlan::execute(VPTransformState *State) { State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); BasicBlock *VectorPreHeader = State->CFG.PrevBB; State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); - replaceVPBBWithIRVPBB( - cast(getVectorLoopRegion()->getSingleSuccessor()), - State->CFG.ExitBB); // Disconnect VectorPreHeader from ExitBB in both the CFG and DT. cast(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr); State->CFG.DTU.applyUpdates( {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); + // Replace regular VPBB's for the middle and scalar preheader blocks with + // VPIRBasicBlocks with VPIRBasicBlocks wrapping their IR blocks. The IR + // blocks are created during skeleton creation, so we can only create the + // VPIRBasicBlocks after legacy skeleton creation. + BasicBlock *MiddleBB = State->CFG.ExitBB; + VPBasicBlock *MiddleVPBB = + cast(getVectorLoopRegion()->getSingleSuccessor()); + // Find the VPBB for the scalar preheader, relying on the current structure + // when creating the middle block and its successrs: if there's a single + // predecessor, it must be the scalar preheader. Otherwise, the second + // successor is the scalar preheader. + BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); + auto &MiddleSuccs = MiddleVPBB->getSuccessors(); + assert((MiddleSuccs.size() == 1 || MiddleSuccs.size() == 2) && + "middle block has unexpected successors"); + VPBasicBlock *ScalarPhVPBB = cast( + MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]); + assert(!isa(ScalarPhVPBB) && + "scalar preheader cannot be wrapped already"); + replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh); + replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); + + // Disconnect the middle block from its single successor (the scalar loop + // header) in both the CFG and DT. The branch will be created during VPlan + // execution. + auto *BrInst = new UnreachableInst(MiddleBB->getContext()); + BrInst->insertBefore(MiddleBB->getTerminator()); + MiddleBB->getTerminator()->eraseFromParent(); + State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}}); // Generate code in the loop pre-header and body. for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) Block->execute(State); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index db10c7a240c7e..fecc56f4d4bb1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3203,7 +3203,9 @@ class VPlan { /// pre-header, followed by a region for the vector loop, followed by the /// middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, - ScalarEvolution &PSE, BasicBlock *PH); + ScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop); /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 788e6c96d32aa..444fad339961e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -195,8 +195,6 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { Lane = VPLane::getFirstLane(); VPBasicBlock *MiddleVPBB = cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); - assert(MiddleVPBB->getNumSuccessors() == 0 && - "the middle block must not have any successors"); BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), MiddleBB); @@ -298,6 +296,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { if (isVectorToScalar()) return true; switch (Opcode) { + case Instruction::ICmp: case VPInstruction::BranchOnCond: case VPInstruction::BranchOnCount: case VPInstruction::CalculateTripCountMinusVF: @@ -340,8 +339,9 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { return Builder.CreateNot(A, Name); } case Instruction::ICmp: { - Value *A = State.get(getOperand(0), Part); - Value *B = State.get(getOperand(1), Part); + bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); + Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed); + Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed); return Builder.CreateCmp(getPredicate(), A, B, Name); } case Instruction::Select: { @@ -443,8 +443,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); // Replace the temporary unreachable terminator with a new conditional - // branch, hooking it up to backward destination for exiting blocks now and - // to forward destination(s) later when they are created. + // branch, hooking it up to backward destination for exiting blocks now and + // to forward destination(s) later when they are created. BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); CondBr->setSuccessor(0, nullptr); @@ -676,6 +676,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: + case VPInstruction::BranchOnCond: return true; }; llvm_unreachable("switch should return"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e2b7b0c7a219d..9337c79b1f2cc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -34,8 +34,11 @@ void VPlanTransforms::VPInstructionsToVPRecipes( ScalarEvolution &SE, const TargetLibraryInfo &TLI) { ReversePostOrderTraversal> RPOT( - Plan->getEntry()); + Plan->getVectorLoopRegion()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { + // Skip blocks outside region + if (!VPBB->getParent()) + break; VPRecipeBase *Term = VPBB->getTerminator(); auto EndIter = Term ? Term->getIterator() : VPBB->end(); // Introduce each ingredient into VPlan. @@ -362,10 +365,16 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) { SmallVector WorkList; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { + // Don't fold the exit block of the Plan into its single predecessor for + // now. + // TODO: Remove restriction once more of the skeleton is modeled in VPlan. + if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent()) + continue; auto *PredVPBB = dyn_cast_or_null(VPBB->getSinglePredecessor()); - if (PredVPBB && PredVPBB->getNumSuccessors() == 1) - WorkList.push_back(VPBB); + if (!PredVPBB || PredVPBB->getNumSuccessors() != 1) + continue; + WorkList.push_back(VPBB); } for (VPBasicBlock *VPBB : WorkList) { @@ -812,8 +821,19 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, if (auto *FOR = dyn_cast(&R)) RecurrencePhis.push_back(FOR); - VPBuilder MiddleBuilder( - cast(Plan.getVectorLoopRegion()->getSingleSuccessor())); + VPBasicBlock *MiddleVPBB = + cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); + VPBuilder MiddleBuilder; + // Set insert point so new recipes are inserted before terminator and + // condition, if there is either the former or both. + if (MiddleVPBB->getNumSuccessors() != 2) + MiddleBuilder.setInsertPoint(MiddleVPBB); + else if (isa(MiddleVPBB->getTerminator()->getOperand(0))) + MiddleBuilder.setInsertPoint( + &*std::prev(MiddleVPBB->getTerminator()->getIterator())); + else + MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator()); + for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) { SmallPtrSet SeenPhis; VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 56c82b13ccdef..5e899ecff8d43 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -160,10 +160,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { errs() << "Same IR basic block used by multiple wrapper blocks!\n"; return false; } - if (IRBB != IRBB->getPlan()->getPreheader()) { - errs() << "VPIRBasicBlock can only be used as pre-header at the moment!\n"; - return false; - } return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index 33d7a3a3c8ac0..f728a3f74779c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -47,11 +47,11 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: @@ -154,15 +154,15 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 031f362914596..56616d4fca4c9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -124,9 +124,9 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index eff876fdfe96c..0d7a1dbc841e5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -47,11 +47,11 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE-4: middle.block: -; INTERLEAVE-4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-4-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP13]], [[TMP12]] ; INTERLEAVE-4-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP14]], [[BIN_RDX]] ; INTERLEAVE-4-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP15]], [[BIN_RDX7]] ; INTERLEAVE-4-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) +; INTERLEAVE-4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE-4: scalar.ph: ; INTERLEAVE-4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -96,9 +96,9 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE-2: middle.block: -; INTERLEAVE-2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-2-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP7]], [[TMP6]] ; INTERLEAVE-2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; INTERLEAVE-2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE-2: scalar.ph: ; INTERLEAVE-2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 774ba911504e2..fae23b6907b07 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -785,8 +785,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -868,9 +868,9 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index b04009757a4a8..79b3fb02bd0a2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -98,13 +98,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or [[TMP58]], [[TMP57]] ; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[BIN_RDX]]) ; DEFAULT-NEXT: [[TMP64:%.*]] = call i32 @llvm.vscale.i32() ; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4 ; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1 ; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT13:%.*]] = extractelement [[TMP20]], i32 [[TMP66]] +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: ; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ] @@ -351,9 +351,9 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or [[TMP22]], [[TMP21]] ; DEFAULT-NEXT: [[TMP24:%.*]] = call i16 @llvm.vector.reduce.or.nxv4i16( [[BIN_RDX]]) +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: ; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 613f46cb72aec..c7447b6a8101e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -57,8 +57,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP7]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -279,11 +279,11 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP35]], [[TMP34]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX7:%.*]] = fadd [[TMP36]], [[BIN_RDX]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX8:%.*]] = fadd [[TMP37]], [[BIN_RDX7]] ; CHECK-UNORDERED-NEXT: [[TMP41:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX8]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -590,9 +590,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[TMP20:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP15]]) ; CHECK-UNORDERED-NEXT: [[TMP19:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -862,8 +862,8 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP11]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -1105,8 +1105,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP11]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1348,8 +1348,8 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[TMP10]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1538,11 +1538,11 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd [[TMP49]], [[TMP48]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd [[TMP50]], [[BIN_RDX]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd [[TMP51]], [[BIN_RDX11]] ; CHECK-UNORDERED-NEXT: [[TMP55:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1932,11 +1932,11 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX:%.*]] = fadd nnan [[TMP49]], [[TMP48]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX11:%.*]] = fadd nnan [[TMP50]], [[BIN_RDX]] ; CHECK-UNORDERED-NEXT: [[BIN_RDX12:%.*]] = fadd nnan [[TMP51]], [[BIN_RDX11]] ; CHECK-UNORDERED-NEXT: [[TMP55:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float -0.000000e+00, [[BIN_RDX12]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll index 9cbb3d1ffcd6b..6b10d4591f416 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll @@ -51,8 +51,8 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SC_SVE: middle.block: -; SC_SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; SC_SVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]]) +; SC_SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; SC_SVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SC_SVE: scalar.ph: ; SC_SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -126,8 +126,8 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; NO_SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO_SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SC_SVE: middle.block: -; NO_SC_SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; NO_SC_SVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP17]]) +; NO_SC_SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; NO_SC_SVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; NO_SC_SVE: scalar.ph: ; NO_SC_SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index 659c4b26cd3e1..a35c9d0c678ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -48,8 +48,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = and i64 [[TMP19]], [[TMP17]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 7b4fdd16a5379..0502ff5dc08fa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -46,9 +46,9 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP17]], [[TMP16]] ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] @@ -73,8 +73,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP26]]) +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index dba472b3471e7..6dcf992e06bf4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1509,11 +1509,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_MASKED_GATHER4]], i32 [[TMP32]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index 08a23e7226969..2470bca1e17b9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -14,7 +14,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 @@ -38,7 +37,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or [[VEC_PHI3]], [[NOT3]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or [[VEC_PHI4]], [[NOT4]] ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC4-NEXT: [[OR1:%.*]] = or [[VEC_SEL2]], [[VEC_SEL1]] ; CHECK-VF4IC4-NEXT: [[OR2:%.*]] = or [[VEC_SEL3]], [[OR1]] ; CHECK-VF4IC4-NEXT: [[OR3:%.*]] = or [[VEC_SEL4]], [[OR2]] @@ -75,7 +73,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 %b, i32 %a @@ -109,7 +106,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 2 @@ -168,7 +164,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or [[VEC_PHI]], [[VEC_ICMP]] ; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select [[MASK]], [[VEC_SEL_TMP]], [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll index 07ecbb7c34a9b..cf4d65318b7e5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll @@ -93,7 +93,6 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NOTF: %[[LOAD:.*]] = load ; CHECK-NOTF: %[[ADD:.*]] = fadd fast %[[LOAD]] ; CHECK-NOTF: middle.block: -; CHECK-NOTF-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NOTF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, %[[ADD]]) ; CHECK-TF-NORED-LABEL: @fadd_red_fast @@ -102,7 +101,6 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-TF-NORED: %[[LOAD:.*]] = load ; CHECK-TF-NORED: %[[ADD:.*]] = fadd fast %[[LOAD]] ; CHECK-TF-NORED: middle.block: -; CHECK-TF-NORED-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-TF-NORED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, %[[ADD]]) ; CHECK-TF-NOREC-LABEL: @fadd_red_fast @@ -151,7 +149,6 @@ define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEOVERSE-V1: %[[LOAD:.*]] = load ; CHECK-NEOVERSE-V1: %[[ADD:.*]] = fadd fast %[[LOAD]] ; CHECK-NEOVERSE-V1: middle.block: -; CHECK-NEOVERSE-V1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEOVERSE-V1-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, %[[ADD]]) entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 3a939952bf818..76084776b2b76 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -256,8 +256,8 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP12]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index ad0d40ee7fbde..d9dfc17fb589b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -35,6 +35,14 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -64,6 +72,14 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -98,6 +114,14 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -127,6 +151,14 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -160,6 +192,14 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -189,6 +229,14 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index f67b932b04a0d..ccc99f2fb2ccd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -33,6 +33,14 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -62,6 +70,14 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index 588fa7ca758ef..3f1320d90c640 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -664,8 +664,8 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -733,8 +733,8 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -802,8 +802,8 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -871,8 +871,8 @@ define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll index a1c388ed2cc27..6953834335669 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -51,8 +51,8 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PA]], [[WHILE_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll index 586aa63d7dc77..3ffd424f0cb89 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll @@ -64,8 +64,8 @@ define i32 @test(ptr nocapture readonly %x) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[T]], [[N_VEC]] ; CHECK-NEXT: [[TMP15:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP13]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[T]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[OUTEREND]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTERLOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll index 121405857f5a5..f58d864e1e147 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -26,9 +26,9 @@ define arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index 71f406e5a713c..f7b656d23417e 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -622,8 +622,8 @@ define i32 @i32_smin_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -691,8 +691,8 @@ define i32 @i32_smax_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -760,8 +760,8 @@ define i32 @i32_umin_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -829,8 +829,8 @@ define i32 @i32_umax_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 832ec14cc3f55..fa3bce98aa84c 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -109,7 +109,6 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[TMP44]], [[TMP43]] ; CHECK-NEXT: [[BIN_RDX32:%.*]] = or <2 x i64> [[TMP45]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX33:%.*]] = or <2 x i64> [[TMP46]], [[BIN_RDX32]] @@ -118,6 +117,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[BIN_RDX36:%.*]] = or <2 x i64> [[TMP49]], [[BIN_RDX35]] ; CHECK-NEXT: [[BIN_RDX37:%.*]] = or <2 x i64> [[TMP50]], [[BIN_RDX36]] ; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[BIN_RDX37]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index da9c17acc15df..1d1b20d16d090 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -100,8 +100,8 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP9]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 7ac07959dec4e..e742933685061 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -39,8 +39,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: -; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) +; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; OUTLOOP: scalar.ph: ; OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/ordered-reduction.ll index be27c33f36843..dfe52eaf13bb4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/ordered-reduction.ll @@ -28,8 +28,8 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UNORDERED: middle.block: -; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: ; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 76aaff6c9c83c..868df9cc5a7da 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -80,7 +80,16 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: ; CHECK: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] @@ -225,7 +234,16 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: ; CHECK: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index 8ea88a451b01d..c634c2cf7fc67 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -426,8 +426,8 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLENUNK: middle.block: -; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) +; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLENUNK: scalar.ph: ; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -476,8 +476,8 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLEN128: middle.block: -; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) +; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLEN128: scalar.ph: ; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 0bbd8cfe76dcf..2b58acbfe9cc9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -27,7 +27,6 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 @@ -57,7 +56,6 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) ; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] ; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 @@ -102,7 +100,6 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 @@ -132,7 +129,6 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) ; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] ; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 @@ -175,7 +171,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 @@ -203,7 +198,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) ; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] ; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 @@ -246,7 +240,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a @@ -274,7 +267,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) ; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] ; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a @@ -317,7 +309,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 @@ -345,7 +336,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) ; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] ; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 @@ -417,7 +407,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) ; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 @@ -449,7 +438,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[PREDPHI]]) ; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]] ; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 79374442e0b9d..47f9c7abdc912 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -66,7 +66,7 @@ define void @func_21() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 6, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index eedabde5fc1a1..10c1f21a7f023 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -212,11 +212,11 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP123:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP123]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP120]], [[TMP119]] ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[TMP121]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX5:%.*]] = fadd fast <4 x float> [[TMP122]], [[BIN_RDX4]] ; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 51e10521a7a0e..d58b4f5053df6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index d22ccf6671e84..968dc09bec4e6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -319,7 +319,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY]] ], [ 100, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] @@ -399,7 +399,7 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 484607e9b10b1..48035298a6d08 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -51,11 +51,11 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <16 x i32> [[TMP7]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <16 x i32> [[TMP8]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX2]], 56 @@ -78,8 +78,8 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT18]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC13]] ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP13]]) +; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 55c650d97bdf9..8be359af9e6a8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -851,11 +851,11 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP81]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP78]], [[TMP77]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP80]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP82:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1891,11 +1891,11 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP81]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP78]], [[TMP77]] ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP79]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP80]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP82:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 3019a045013c0..ec838986ea48b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -82,8 +82,8 @@ define void @test(ptr %p) { ; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VEC: middle.block: -; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP28]], i32 3 +; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: ; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 5379e40684c77..804bcb54b4ba5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -289,10 +289,10 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <4 x float> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] @@ -374,10 +374,10 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf ogt <4 x float> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index fa3b67d0f6c27..86eba22d35a3f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -259,8 +259,8 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP18]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -407,8 +407,8 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-GATHER: middle.block: -; SINK-GATHER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; SINK-GATHER-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP66]]) +; SINK-GATHER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; SINK-GATHER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-GATHER: scalar.ph: ; SINK-GATHER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index e4baae43aa797..cf96469bc97b9 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -33,7 +33,7 @@ ; CHECK: br i1 %12, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] ; ; CHECK: vec.epilog.middle.block: -; CHECK: br i1 %cmp.n7, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] +; CHECK: br i1 %cmp.n12, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] ; ; CHECK: vec.epilog.scalar.ph: ; CHECK: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 2bb25a036485f..3374e2e67e35e 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -38,9 +38,9 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 70942e05ac99a..9335396d01c90 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -30,10 +30,10 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] @@ -61,10 +61,10 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] ; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -129,10 +129,10 @@ define i32 @any_of_reduction_epilog_arg_as_start_value(ptr %src, i64 %N, i32 %st ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 [[START]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] @@ -160,10 +160,10 @@ define i32 @any_of_reduction_epilog_arg_as_start_value(ptr %src, i64 %N, i32 %st ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] ; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 [[START]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -230,10 +230,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = freeze i1 [[TMP5]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i1 false, i1 false +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END6:%.*]] = trunc i64 [[N_VEC]] to i32 @@ -268,10 +268,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) ; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] ; CHECK-NEXT: [[RDX_SELECT16:%.*]] = select i1 [[TMP13]], i1 false, i1 false +; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -365,10 +365,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[TMP23:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) ; CHECK-NEXT: [[TMP47:%.*]] = freeze i1 [[TMP23]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP47]], i1 false, i1 true +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC]], 16 @@ -420,10 +420,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT20]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[TMP44]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC8]] ; CHECK-NEXT: [[TMP49:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP43]]) ; CHECK-NEXT: [[TMP45:%.*]] = freeze i1 [[TMP49]] ; CHECK-NEXT: [[RDX_SELECT22:%.*]] = select i1 [[TMP45]], i1 false, i1 true +; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 3cf0ff72f95ef..4df5332a47d4c 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -28,8 +28,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] @@ -54,8 +54,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP10]]) +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -119,8 +119,8 @@ define float @fp_reduction_max(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] @@ -147,8 +147,8 @@ define float @fp_reduction_max(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP11]]) +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -313,9 +313,9 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] @@ -344,9 +344,9 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP14]]) ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP13]]) +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] @@ -422,8 +422,8 @@ define i32 @reduction_phi_start_val(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] @@ -448,8 +448,8 @@ define i32 @reduction_phi_start_val(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 5907c58365fa0..08e7c75e48283 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -33,9 +33,17 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> -; CHECK-NEXT: No successors +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> ; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]> @@ -94,9 +102,17 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]> = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]> = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index ff97e15417aff..a861d9c5fb7f0 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -661,9 +661,9 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 true, label %End, label %scalar.ph ; CHECK: scalar.ph: -; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ] -; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[TMP3]], %middle.block ] -; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[TMP0]], %middle.block ] +; CHECK-NEXT: phi double [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ], [ 0.000000e+00, %Entry ] +; CHECK-NEXT: phi double [ [[TMP3]], %middle.block ], [ 0.000000e+00, %Entry ] +; CHECK-NEXT: phi double [ [[TMP0]], %middle.block ], [ 0.000000e+00, %Entry ] ; CHECK: End: ; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP0]], %middle.block ] ; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP3]], %middle.block ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 1af76467af04f..b24b34c698b59 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -848,8 +848,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -931,8 +931,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 544b9cfeb1bba..f9e7768ca20e9 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -123,8 +123,8 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -246,8 +246,8 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias % ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 30a98db665cb3..b53b72e651e3f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -74,6 +74,13 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1]]> @@ -149,6 +156,13 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> @@ -222,7 +236,14 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %res = vp<[[RED_RES]]> @@ -322,7 +343,14 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1]]> @@ -408,7 +436,14 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> @@ -483,7 +518,14 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%l>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%l>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 2e85fedcafc9a..b259375d911ea 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -55,8 +55,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -165,8 +165,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -265,10 +265,10 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP17]], <4 x i32> [[TMP18]]) ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX]]) ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -336,8 +336,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP15]], i32 [[TMP16]]) +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] @@ -399,9 +399,9 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -530,8 +530,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -674,8 +674,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -916,9 +916,9 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1009,9 +1009,9 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1807,8 +1807,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1915,8 +1915,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -2273,8 +2273,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -2386,8 +2386,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll index fbcd711b5ca4f..3fd1b573a1ed4 100644 --- a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll +++ b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll @@ -4,11 +4,11 @@ ; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=4 <%s | FileCheck %s ; ; CHECK: middle.block: -; CHECK-NEXT: %{{.*}}= icmp eq i64{{.*}}, !dbg ![[DL:[0-9]+]] -; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]] +; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL:[0-9]+]] ; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]] ; CHECK-NEXT: %{{.*}}= add <4 x i32>{{.*}}, !dbg ![[DL]] ; CHECK-NEXT: %{{.*}}= call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> {{.*}}), !dbg ![[DL]] +; CHECK-NEXT: %{{.*}}= icmp eq i64{{.*}}, !dbg ![[DL]] ; CHECK-NEXT: br i1 %{{.*}}, !dbg ![[DL]] ; CHECK: ![[DL]] = !DILocation(line: 5, diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index 00ebe9327ae20..692615a49ad9a 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -797,8 +797,8 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP18]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -865,8 +865,8 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]] +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index a4b6aca3ece92..bb0e2b19cfa4b 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -200,8 +200,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI4]], [[PREDPHI]] +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; UNROLL-NEXT: [[TMP16:%.*]] = xor i1 [[CMP_N]], true ; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP16]]) ; UNROLL-NEXT: br label [[SCALAR_PH]] @@ -276,8 +276,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: -; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI4]], [[PREDPHI]] +; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_INC26_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NOSIMPLIFY: scalar.ph: ; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[V_1]], [[FOR_BODY14_PREHEADER]] ] @@ -357,8 +357,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VEC: middle.block: -; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI]]) +; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: [[TMP20:%.*]] = xor i1 [[CMP_N]], true ; VEC-NEXT: call void @llvm.assume(i1 [[TMP20]]) ; VEC-NEXT: br label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 00b00bf160174..7af3e4740ea8c 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -647,8 +647,8 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP3]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -685,8 +685,8 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IND: middle.block: -; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; IND-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP1]]) +; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -727,9 +727,9 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP3]], [[TMP2]] ; UNROLL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -775,9 +775,9 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP7]], [[TMP6]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -818,9 +818,9 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; INTERLEAVE: middle.block: -; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP3]], [[TMP2]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]]) +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -907,8 +907,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP19]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -967,8 +967,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IND: middle.block: -; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; IND-NEXT: [[TMP20:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP18]]) +; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1045,9 +1045,9 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP35]], [[TMP34]] ; UNROLL-NEXT: [[TMP37:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]]) +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1126,9 +1126,9 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP36]], [[TMP35]] ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]]) +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -2005,8 +2005,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP15]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -2071,8 +2071,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IND: middle.block: -; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; IND-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP13]]) +; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -2160,9 +2160,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP23]], [[TMP22]] ; UNROLL-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -2259,9 +2259,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP31]], [[TMP30]] ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -2384,9 +2384,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; INTERLEAVE: middle.block: -; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP43]], [[TMP42]] ; INTERLEAVE-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -3216,8 +3216,8 @@ define i32 @testoverflowcheck() { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] @@ -3259,8 +3259,8 @@ define i32 @testoverflowcheck() { ; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; IND: middle.block: ; IND-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]] -; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; IND-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]]) +; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] @@ -3301,8 +3301,8 @@ define i32 @testoverflowcheck() { ; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]] -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; UNROLL-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]]) +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] @@ -3347,9 +3347,9 @@ define i32 @testoverflowcheck() { ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP6]], [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] @@ -3391,8 +3391,8 @@ define i32 @testoverflowcheck() { ; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]] -; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP6]]) +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] @@ -6256,8 +6256,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6327,8 +6327,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; IND: middle.block: -; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1 +; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6402,8 +6402,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1 +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6485,8 +6485,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6560,8 +6560,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; INTERLEAVE: middle.block: -; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3 +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index bcc8632da4c64..c5b5404871049 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -109,6 +109,14 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): middle.block ; DBG-EMPTY: ; DBG-NEXT: middle.block: +; DBG-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> +; DBG-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; DBG-NEXT: Successor(s): ir-bb, ir-bb +; DBG-EMPTY: +; DBG-NEXT: ir-bb +; DBG-NEXT: No successors +; DBG-EMPTY: +; DBG-NEXT: ir-bb ; DBG-NEXT: No successors ; DBG-NEXT: } @@ -203,6 +211,14 @@ exit: ; DBG-EMPTY: ; DBG-NEXT: middle.block: ; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end vp<[[SCALAR_STEPS]]>, ir<1> +; DBG-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> +; DBG-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; DBG-NEXT: Successor(s): ir-bb, ir-bb +; DBG-EMPTY: +; DBG-NEXT: ir-bb +; DBG-NEXT: No successors +; DBG-EMPTY: +; DBG-NEXT: ir-bb ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_1]]> diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 10a8dc7688fbc..9589e7b56cd08 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1511,8 +1511,8 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i16> [[WIDE_VEC]], i64 7 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll index b7519b8b149e7..50c67040cfb2a 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll @@ -237,8 +237,8 @@ define i32 @variant_val_store_to_inv_address(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP3]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index c86ea002a75fc..20d612a548b15 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -46,8 +46,8 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] @@ -396,9 +396,9 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) ; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll index 7411c88001290..f3885b0b100e8 100644 --- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll @@ -57,7 +57,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF8]] ; CHECK-NEXT: br label [[VECTOR_BODY11:%.*]] -; CHECK: vector.body11: +; CHECK: vector.body10: ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ 0, [[VECTOR_PH7]] ], [ [[INDEX_NEXT13:%.*]], [[VECTOR_BODY11]] ] ; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !4, !noalias !7 ; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX12]], 4 @@ -87,7 +87,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF24]] ; CHECK-NEXT: br label [[VECTOR_BODY28:%.*]] -; CHECK: vector.body28: +; CHECK: vector.body27: ; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ 0, [[VECTOR_PH23]] ], [ [[INDEX_NEXT30:%.*]], [[VECTOR_BODY28]] ] ; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !10, !noalias !13 ; CHECK-NEXT: [[INDEX_NEXT30]] = add nuw i64 [[INDEX29]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-align.ll b/llvm/test/Transforms/LoopVectorize/reduction-align.ll index a7f1fbc2aa436..4ae709892233f 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-align.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-align.ll @@ -35,9 +35,9 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[HEIGHT]], [[N_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) ; CHECK-NEXT: store i16 [[TMP3]], ptr [[HBUF]], align 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[HEIGHT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 6b8fef4e7676a..873f6364f8281 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -596,8 +596,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> [[PREDPHI15]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -731,8 +731,8 @@ define i32 @cond-uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -895,8 +895,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI15]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1037,8 +1037,8 @@ define i32 @uncond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PREDPHI]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1166,8 +1166,8 @@ define i32 @uncond_cond_uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP28]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll index 491889d6008ee..0b98a054ebea3 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll @@ -27,7 +27,6 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF3-NEXT: br i1 [[EC]], label %middle.block, label %vector.body ; ; UF3-LABEL: middle.block: -; UF3-NEXT: %cmp.n = icmp eq i64 {{.+}}, %n.vec ; UF3-NEXT: [[RDX0:%.+]] = add <4 x i32> [[SUM1_NEXT]], [[SUM0_NEXT]] ; UF3-NEXT: [[RDX1:%.+]] = add <4 x i32> [[SUM2_NEXT]], [[RDX0]] ; UF3-NEXT: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[RDX1]]) @@ -70,7 +69,6 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF5-NEXT: br i1 [[EC]], label %middle.block, label %vector.body ; ; UF5-LABEL: middle.block: -; UF5-NEXT: %cmp.n = icmp eq i64 {{.+}}, %n.vec ; UF5-NEXT: [[RDX0:%.+]] = add <4 x i32> [[SUM1_NEXT]], [[SUM0_NEXT]] ; UF5-NEXT: [[RDX1:%.+]] = add <4 x i32> [[SUM2_NEXT]], [[RDX0]] ; UF5-NEXT: [[RDX2:%.+]] = add <4 x i32> [[SUM3_NEXT]], [[RDX1]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 8291e5ac3a899..337e1592a8304 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -25,10 +25,10 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -104,10 +104,10 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i8> ; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = zext i8 [[TMP9]] to i32 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -183,10 +183,10 @@ define i32 @PR35734(i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[X]], [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index 0d48808519649..b66ce4047ad95 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -29,8 +29,8 @@ define i32 @reduction_sum(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] @@ -110,8 +110,8 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] @@ -191,8 +191,8 @@ define i32 @reduction_mix(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] @@ -272,8 +272,8 @@ define i32 @reduction_mul(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] @@ -351,8 +351,8 @@ define i32 @start_at_non_zero(ptr %in, ptr %coeff, ptr %out, i32 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -427,8 +427,8 @@ define i32 @reduction_and(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -503,8 +503,8 @@ define i32 @reduction_or(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -579,8 +579,8 @@ define i32 @reduction_xor(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -699,8 +699,8 @@ define i32 @reduction_sub_lhs(i32 %n, ptr %A) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -1063,8 +1063,8 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] @@ -1221,8 +1221,8 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -1299,8 +1299,8 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 9045ea02098a2..e68e658f0e879 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -28,7 +28,6 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}} ; CHECK: middle.block: -; CHECK-NEXT: %cmp.n = icmp eq i32 256, %n.vec ; CHECK-NEXT: [[TMP37:%.*]] = trunc [[TMP34]] to ; CHECK-NEXT: [[TMP38:%.*]] = trunc [[TMP36]] to ; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP38]], [[TMP37]] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index 2c47c79e44c09..6a9f83a9e0aa2 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -30,7 +30,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]] ; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body ; CHECK-VF2IC1: middle.block: -; CHECK-VF2IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[PREDPHI]]) ; CHECK-VF2IC1-NEXT: [[FR_TMP20:%.*]] = freeze i1 [[TMP20]] ; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP20]], i32 1, i32 0 @@ -86,7 +85,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i1 [[TMP15]], i1 [[VEC_PHI2]] ; CHECK-VF1IC2: br i1 {{%.*}}, label %middle.block, label %vector.body ; CHECK-VF1IC2: middle.block: -; CHECK-VF1IC2-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF1IC2-NEXT: [[OR:%.*]] = or i1 [[PREDPHI5]], [[PREDPHI]] ; CHECK-VF1IC2-NEXT: [[FR_OR:%.*]] = freeze i1 [[OR]] ; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_OR]], i32 1, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index 58e0eb7456a4b..993b56a05207b 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -11,7 +11,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 @@ -34,7 +33,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <4 x i1> [[VEC_PHI4]], [[NOT4]] ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = or <4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = or <4 x i1> [[VEC_SEL3]], [[VEC_SEL5]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = or <4 x i1> [[VEC_SEL4]], [[VEC_SEL6]] @@ -65,7 +63,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[VEC_SEL3:%.*]] = or i1 [[VEC_PHI3]], [[NOT3]] ; CHECK-VF1IC4-NEXT: [[VEC_SEL4:%.*]] = or i1 [[VEC_PHI4]], [[NOT4]] ; CHECK-VF1IC4: middle.block: -; CHECK-VF1IC4-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = or i1 [[VEC_SEL2]], [[VEC_SEL1]] ; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = or i1 [[VEC_SEL3]], [[VEC_SEL5]] ; CHECK-VF1IC4-NEXT: [[OR_RDX:%.*]] = or i1 [[VEC_SEL4]], [[VEC_SEL6]] @@ -99,7 +96,6 @@ define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 @@ -135,7 +131,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a @@ -167,7 +162,6 @@ define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 @@ -199,7 +193,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 @@ -234,7 +227,6 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) ; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 7ab2459ada2ed..60ef17deb0967 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -26,7 +26,15 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: No successors +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 0fbaafe0ccb63..b66103a64cf1f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -37,12 +37,21 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; ; CHECK: Executing best plan with VF=8, UF=2 ; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' { ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -69,6 +78,14 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 65df5973ac75b..3d405e1631daf 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -8,7 +8,8 @@ define void @foo(i64 %n) { ; CHECK: VPlan 'HCFGBuilder: Plain CFG ; CHECK-NEXT: { -; CHECK-NEXT: ir<8> = original trip-count +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -42,6 +43,14 @@ define void @foo(i64 %n) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<8>, vp<[[VTC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 4c93cb870fadb..36bd4f77cd4f2 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -34,7 +34,15 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: No successors +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -87,7 +95,15 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: No successors +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: @@ -138,6 +154,14 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out float %red.next.lcssa = vp<[[RED_RES]]> @@ -187,6 +211,14 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:.+]]> = compute-reduction-result ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -257,6 +289,14 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -322,6 +362,14 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<256>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -387,6 +435,14 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%sum.07>, ir<[[MULADD]]> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out float %muladd.lcssa = vp<[[RED_RES]]> @@ -464,6 +520,14 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<128>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT:} ; @@ -533,6 +597,14 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -582,6 +654,14 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %lcssa = ir<%add> @@ -634,6 +714,14 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -686,6 +774,14 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -758,6 +854,14 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -819,6 +923,14 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -913,6 +1025,14 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-from-end ir<%for.1.next>, ir<2> ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1.lcssa = vp<[[FOR_RESULT]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index 8c18c107b39fc..e9ac9e581e585 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -45,7 +45,15 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: No successors +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<0>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 41bb3ca8694fa..c4a25f12c03d7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1066,6 +1066,13 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT branch-on-cond ir +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -1137,6 +1144,14 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll index 25d526afefb39..7d10bb211dbae 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll @@ -18,8 +18,12 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c ; CHECK: for.body.preheader10: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: for.body.preheader13: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC:%.*]], [[MIDDLE_BLOCK:%.*]] ] +; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7:%.*]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288 +; CHECK-NEXT: [[N_VEC]] = and i64 [[TMP0]], 4294967288 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -33,131 +37,14 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]] -; CHECK: for.body.preheader13: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: if.then: -; CHECK-NEXT: tail call void @llvm.trap() -; CHECK-NEXT: unreachable -; -entry: - %array.addr = alloca ptr, align 8 - %count.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %sum = alloca i32, align 4 - %i = alloca i32, align 4 - store ptr %array, ptr %array.addr, align 8 - store i32 %count, ptr %count.addr, align 4 - store i32 %n, ptr %n.addr, align 4 - call void @llvm.lifetime.start.p0(i64 4, ptr %sum) #3 - store i32 0, ptr %sum, align 4 - call void @llvm.lifetime.start.p0(i64 4, ptr %i) #3 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %1 = load i32, ptr %n.addr, align 4 - %cmp = icmp ult i32 %0, %1 - br i1 %cmp, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond - call void @llvm.lifetime.end.p0(i64 4, ptr %i) #3 - br label %for.end - -for.body: ; preds = %for.cond - %2 = load i32, ptr %i, align 4 - %3 = load i32, ptr %count.addr, align 4 - %cmp1 = icmp uge i32 %2, %3 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - call void @llvm.trap() - br label %if.end - -if.end: ; preds = %if.then, %for.body - %4 = load ptr, ptr %array.addr, align 8 - %5 = load i32, ptr %i, align 4 - %idxprom = zext i32 %5 to i64 - %arrayidx = getelementptr inbounds i32, ptr %4, i64 %idxprom - %6 = load i32, ptr %arrayidx, align 4 - %7 = load i32, ptr %sum, align 4 - %add = add nsw i32 %7, %6 - store i32 %add, ptr %sum, align 4 - br label %for.inc - -for.inc: ; preds = %if.end - %8 = load i32, ptr %i, align 4 - %inc = add i32 %8, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond.cleanup - %9 = load i32, ptr %sum, align 4 - call void @llvm.lifetime.end.p0(i64 4, ptr %sum) - ret i32 %9 -} - -%"class.std::__1::span" = type { ptr, i64 } -%"class.std::__1::__wrap_iter" = type { ptr } - -define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1, i64 noundef %n) { -; CHECK-LABEL: define dso_local noundef i32 @sum_prefix_with_sum( -; CHECK-SAME: ptr nocapture readonly [[S_COERCE0:%.*]], i64 [[S_COERCE1:%.*]], i64 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i64 [[N]], 0 -; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i64 [[TMP0]], [[S_COERCE1]] -; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[ENTRY:%.*]], label [[COND_FALSE_I:%.*]], !prof [[PROF4:![0-9]+]] -; CHECK: for.body.preheader8: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD10]], [[VEC_PHI9]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: middle.block: +; CHECK-NEXT: [[TMP7]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; CHECK-NEXT: [[ADD]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]] -; CHECK: for.body.preheader11: -; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ] -; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ] -; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[RET_0_LCSSA1:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ [[ADD1:%.*]], [[FOR_BODY1]] ] ; CHECK-NEXT: ret i32 [[RET_0_LCSSA1]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll index f0a1846389580..791ef7cbeb361 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -53,6 +53,9 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2-NEXT: br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] ; O2: for.body4.preheader: ; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER9:%.*]], label [[VECTOR_BODY:%.*]] +; O2: for.body4.preheader9: +; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK:%.*]] ] +; O2-NEXT: br label [[FOR_BODY4:%.*]] ; O2: vector.body: ; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ] ; O2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]] @@ -65,12 +68,9 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; O2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; O2: middle.block: ; O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER9]] -; O2: for.body4.preheader9: -; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; O2-NEXT: br label [[FOR_BODY4:%.*]] ; O2: for.cond.cleanup: ; O2-NEXT: ret void ; O2: for.cond.cleanup3: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index c658724278fe0..e7b5119048915 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -67,10 +67,10 @@ class VPlanTestBase : public testing::Test { assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - Loop *L = LI->getLoopFor(LoopHeader); - auto Plan = VPlan::createInitialVPlan(SE->getBackedgeTakenCount(L), *SE, - L->getLoopPreheader()); - VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); + auto Plan = VPlan::createInitialVPlan( + SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false, + LI->getLoopFor(LoopHeader)); + VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); HCFGBuilder.buildHierarchicalCFG(); return Plan; } @@ -81,10 +81,10 @@ class VPlanTestBase : public testing::Test { assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - Loop *L = LI->getLoopFor(LoopHeader); - auto Plan = VPlan::createInitialVPlan(SE->getBackedgeTakenCount(L), *SE, - L->getLoopPreheader()); - VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); + auto Plan = VPlan::createInitialVPlan( + SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false, + LI->getLoopFor(LoopHeader)); + VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); HCFGBuilder.buildPlainCFG(); return Plan; }