diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fa5be21dc2b8a..8e4d70aaf7538 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4030,7 +4030,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPScalarIVStepsSC: case VPDef::VPReplicateSC: case VPDef::VPInstructionSC: - case VPDef::VPCanonicalIVPHISC: case VPDef::VPVectorPointerSC: case VPDef::VPVectorEndPointerSC: case VPDef::VPExpandSCEVSC: @@ -8428,6 +8427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( m_Specific(Plan->getCanonicalIV()), m_VPValue())) && "Did not find the canonical IV increment"); cast(IVInc)->dropPoisonGeneratingFlags(); + Plan->getCanonicalIVInfo().HasNUW = false; } // --------------------------------------------------------------------------- @@ -8491,8 +8491,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // latter are added above for masking. // FIXME: Migrate code relying on the underlying instruction from VPlan0 // to construct recipes below to not use the underlying instruction. - if (isa( - &R) || + if (isa(&R) || (isa(&R) && !UnderlyingValue)) continue; @@ -8679,8 +8678,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, Builder, BlockMaskCache, nullptr /*LVer*/); for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (isa(&R)) - continue; auto *HeaderR = cast(&R); RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } @@ -9430,8 +9427,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { SmallPtrSet EpiWidenedPhis; for (VPRecipeBase &R : EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (isa(&R)) - continue; EpiWidenedPhis.insert( cast(R.getVPSingleValue()->getUnderlyingValue())); } @@ -9492,8 +9487,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { VPPhi *ResumePhi = nullptr; if (ResumePhiIter == MainScalarPH->phis().end()) { VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); + Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC); ResumePhi = ScalarPHBuilder.createScalarPhi( - {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, + {VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {}, "vec.epilog.resume.val"); } else { ResumePhi = cast(&*ResumePhiIter); @@ -9523,7 +9519,7 @@ static SmallVector preparePlanForEpilogueVectorLoop( // Ensure that the start values for all header phi recipes are updated before // vectorizing the epilogue loop. - VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); + // When vectorizing the epilogue loop, the canonical induction start // value needs to be changed from zero to the value after the main // vector loop. Find the resume value created during execution of the main @@ -9552,6 +9548,7 @@ static SmallVector preparePlanForEpilogueVectorLoop( EPI.VectorTripCount = EPResumeVal->getOperand(0); } VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); + VPValue *IV = VectorLoop->getCanonicalIV(); assert(all_of(IV->users(), [](const VPUser *U) { return isa(U) || @@ -9562,11 +9559,14 @@ static SmallVector preparePlanForEpilogueVectorLoop( }) && "the canonical IV should only be used by its increment or " "ScalarIVSteps when resetting the start value"); - IV->setOperand(0, VPV); + VPBuilder Builder(Header, Header->getFirstNonPhi()); + VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV}); + IV->replaceAllUsesWith(Add); + Add->setOperand(0, IV); DenseMap ToFrozen; SmallVector InstsToMove; - for (VPRecipeBase &R : drop_begin(Header->phis())) { + for (VPRecipeBase &R : Header->phis()) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast(&R)) { @@ -9596,12 +9596,12 @@ static SmallVector preparePlanForEpilogueVectorLoop( ToFrozen[StartV] = cast(ResumeV)->getIncomingValueForBlock( EPI.MainLoopIterationCountCheck); - // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires - // an adjustment to the resume value. The resume value is adjusted to - // the sentinel value when the final value from the main vector loop - // equals the start value. This ensures correctness when the start value - // might not be less than the minimum value of a monotonically - // increasing induction variable. + // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions + // requires an adjustment to the resume value. The resume value is + // adjusted to the sentinel value when the final value from the main + // vector loop equals the start value. This ensures correctness when + // the start value might not be less than the minimum value of a + // monotonically increasing induction variable. BasicBlock *ResumeBB = cast(ResumeV)->getParent(); IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt()); Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 02eb6375aac41..a36656ede623e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -768,10 +768,17 @@ static std::pair cloneFrom(VPBlockBase *Entry) { VPRegionBlock *VPRegionBlock::clone() { const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); - auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting, - getName(), isReplicator()); + auto *NewRegion = + getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName()); for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) Block->setParent(NewRegion); + + if (CanIVInfo.CanIV) { + NewRegion->CanIVInfo.CanIV = new VPRegionValue(); + NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW; + NewRegion->CanIVInfo.DL = CanIVInfo.DL; + } + return NewRegion; } @@ -856,6 +863,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << (isReplicator() ? " " : " ") << getName() << ": {"; auto NewIndent = Indent + " "; + if (auto *CanIV = getCanonicalIV()) { + O << '\n'; + CanIV->print(O, SlotTracker); + O << '\n'; + } for (auto *BlockBase : vp_depth_first_shallow(Entry)) { O << '\n'; BlockBase->print(O, NewIndent, SlotTracker); @@ -868,18 +880,37 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, void VPRegionBlock::dissolveToCFGLoop() { auto *Header = cast(getEntry()); - if (auto *CanIV = dyn_cast(&Header->front())) { - assert(this == getPlan()->getVectorLoopRegion() && - "Canonical IV must be in the entry of the top-level loop region"); - auto *ScalarR = VPBuilder(CanIV).createScalarPhi( - {CanIV->getStartValue(), CanIV->getBackedgeValue()}, - CanIV->getDebugLoc(), "index"); + auto *ExitingLatch = cast(getExiting()); + VPValue *CanIV = getCanonicalIV(); + if (CanIV && CanIV->getNumUsers() > 0) { + auto *ExitingTerm = ExitingLatch->getTerminator(); + VPInstruction *CanIVInc = nullptr; + // Check if there's a canonical IV increment via an existing terminator. + if (match(ExitingTerm, + m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) { + assert(match(CanIVInc, + m_Add(m_CombineOr(m_Specific(CanIV), + m_Add(m_Specific(CanIV), m_LiveIn())), + m_VPValue())) && + "invalid existing IV increment"); + } + VPlan &Plan = *getPlan(); + if (!CanIVInc) { + CanIVInc = VPBuilder(ExitingTerm) + .createOverflowingOp( + Instruction::Add, {CanIV, &Plan.getVFxUF()}, + {CanIVInfo.HasNUW, false}, CanIVInfo.DL, "index.next"); + } + Type *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanIV); + auto *ScalarR = + VPBuilder(Header, Header->begin()) + .createScalarPhi( + {Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 0)), CanIVInc}, + CanIVInfo.DL, "index"); CanIV->replaceAllUsesWith(ScalarR); - CanIV->eraseFromParent(); } VPBlockBase *Preheader = getSinglePredecessor(); - auto *ExitingLatch = cast(getExiting()); VPBlockBase *Middle = getSingleSuccessor(); VPBlockUtils::disconnectBlocks(Preheader, this); VPBlockUtils::disconnectBlocks(this, Middle); @@ -916,7 +947,10 @@ VPlan::~VPlan() { for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) R.setOperand(I, &DummyValue); } + } else if (auto *CanIV = cast(VPB)->getCanonicalIV()) { + CanIV->replaceAllUsesWith(&DummyValue); } + delete VPB; } for (VPValue *VPV : getLiveIns()) @@ -1224,6 +1258,11 @@ VPlan *VPlan::duplicate() { // else NewTripCount will be created and inserted into Old2NewVPValues when // TripCount is cloned. In any case NewPlan->TripCount is updated below. + if (auto *LoopRegion = getVectorLoopRegion()) { + Old2NewVPValues[LoopRegion->getCanonicalIV()] = + NewPlan->getVectorLoopRegion()->getCanonicalIV(); + } + remapOperands(Entry, NewEntry, Old2NewVPValues); // Initialize remaining fields of cloned VPlan. @@ -1404,6 +1443,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { /// Returns true if there is a vector loop region and \p VPV is defined in a /// loop region. static bool isDefinedInsideLoopRegions(const VPValue *VPV) { + if (isa(VPV)) + return true; const VPRecipeBase *DefR = VPV->getDefiningRecipe(); return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || DefR->getParent()->getEnclosingLoopRegion()); @@ -1513,9 +1554,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) { ReversePostOrderTraversal> RPOT(VPBlockDeepTraversalWrapper(Plan.getEntry())); - for (const VPBasicBlock *VPBB : - VPBlockUtils::blocksOnly(RPOT)) - assignNames(VPBB); + for (const VPBlockBase *VPB : RPOT) { + if (auto *VPBB = dyn_cast(VPB)) { + assignNames(VPBB); + } else if (auto *CanIV = cast(VPB)->getCanonicalIV()) + assignName(CanIV); + } } void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c167dd7f65fac..926bb88995348 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -551,7 +551,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenSelectSC: case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: - case VPRecipeBase::VPCanonicalIVPHISC: case VPRecipeBase::VPActiveLaneMaskPHISC: case VPRecipeBase::VPFirstOrderRecurrencePHISC: case VPRecipeBase::VPWidenPHISC: @@ -1957,12 +1956,6 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, /// the backedge is the second operand. /// /// Inductions are modeled using the following sub-classes: -/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, -/// starting at a specified value (zero for the main vector loop, the resume -/// value for the epilogue vector loop) and stepping by 1. The induction -/// controls exiting of the vector loop by comparing against the vector trip -/// count. Produces a single scalar PHI for the induction value per -/// iteration. /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and /// floating point inductions with arbitrary start and step values. Produces /// a vector PHI per-part. @@ -3435,63 +3428,6 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe { const SCEV *getSCEV() const { return Expr; } }; -/// Canonical scalar induction phi of the vector loop. Starting at the specified -/// start value (either 0 or the resume value when vectorizing the epilogue -/// loop). VPWidenCanonicalIVRecipe represents the vector version of the -/// canonical induction variable. -class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { -public: - VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {} - - ~VPCanonicalIVPHIRecipe() override = default; - - VPCanonicalIVPHIRecipe *clone() override { - auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc()); - R->addOperand(getBackedgeValue()); - return R; - } - - VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC) - - void execute(VPTransformState &State) override { - llvm_unreachable("cannot execute this recipe, should be replaced by a " - "scalar phi recipe"); - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// Returns the scalar type of the induction. - Type *getScalarType() const { - return getStartValue()->getLiveInIRValue()->getType(); - } - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - - /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - - /// Return the cost of this VPCanonicalIVPHIRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override { - // For now, match the behavior of the legacy cost model. - return 0; - } -}; - /// A recipe for generating the active lane mask for the vector loop that is /// used to predicate the vector operations. /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and @@ -3570,14 +3506,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, public VPUnrollPartAccessor<1> { public: - VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) + VPWidenCanonicalIVRecipe(VPValue *CanonicalIV) : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} ~VPWidenCanonicalIVRecipe() override = default; VPWidenCanonicalIVRecipe *clone() override { - return new VPWidenCanonicalIVRecipe( - cast(getOperand(0))); + return new VPWidenCanonicalIVRecipe(getOperand(0)); } VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC) @@ -3616,8 +3551,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, - const Twine &Name = "") + VPValue *CanonicalIV, VPValue *Step, const Twine &Name = "") : VPDerivedIVRecipe( IndDesc.getKind(), dyn_cast_or_null(IndDesc.getInductionBinOp()), @@ -3963,6 +3897,23 @@ class VPIRBasicBlock : public VPBasicBlock { BasicBlock *getIRBasicBlock() const { return IRBB; } }; +/// Track information about the canonical IV value of a region. +struct VPCanonicalIVInfo { + VPRegionValue *CanIV = nullptr; + bool HasNUW = true; + DebugLoc DL = DebugLoc::getUnknown(); + + VPCanonicalIVInfo(VPRegionValue *CanIV, bool HasNUW, DebugLoc DL) + : CanIV(CanIV), HasNUW(HasNUW), DL(DL) {} + + VPCanonicalIVInfo() {} + + ~VPCanonicalIVInfo() { + if (CanIV) + delete CanIV; + } +}; + /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several @@ -3981,23 +3932,35 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { /// VPRegionBlock. VPBlockBase *Exiting; - /// An indicator whether this region is to generate multiple replicated - /// instances of output IR corresponding to its VPBlockBases. - bool IsReplicator; + /// Canonical IV of the loop region. If CanIV is nullptr, the region is a + /// replicating region. + VPCanonicalIVInfo CanIVInfo; /// Use VPlan::createVPRegionBlock to create VPRegionBlocks. VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, - const std::string &Name = "", bool IsReplicator = false) + const std::string &Name = "") + : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), + CanIVInfo() { + assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); + assert(Exiting->getSuccessors().empty() && "Exit block has successors."); + Entry->setParent(this); + Exiting->setParent(this); + } + + VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, + const VPCanonicalIVInfo &CanIVInfo, + const std::string &Name = "") : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), - IsReplicator(IsReplicator) { + CanIVInfo(CanIVInfo) { assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); assert(Exiting->getSuccessors().empty() && "Exit block has successors."); Entry->setParent(this); Exiting->setParent(this); } - VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) + + VPRegionBlock(DebugLoc DL, const std::string &Name = "") : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), - IsReplicator(IsReplicator) {} + CanIVInfo(new VPRegionValue(), true, DL) {} public: ~VPRegionBlock() override {} @@ -4039,7 +4002,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. - bool isReplicator() const { return IsReplicator; } + bool isReplicator() const { return !getCanonicalIV(); } /// The method which generates the output IR instructions that correspond to /// this VPRegionBlock, thereby "executing" the VPlan. @@ -4067,6 +4030,13 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { /// Remove the current region from its VPlan, connecting its predecessor to /// its entry, and its exiting block to its successor. void dissolveToCFGLoop(); + + /// Return the canonical induction variable of the region, null for + /// replicating regions. + VPValue *getCanonicalIV() { return CanIVInfo.CanIV; } + const VPValue *getCanonicalIV() const { return CanIVInfo.CanIV; } + + VPCanonicalIVInfo &getCanonicalIVInfo() { return CanIVInfo; } }; /// VPlan models a candidate for vectorization, encoding various decisions take @@ -4378,14 +4348,10 @@ class VPlan { LLVM_DUMP_METHOD void dump() const; #endif - /// Returns the canonical induction recipe of the vector loop. - VPCanonicalIVPHIRecipe *getCanonicalIV() { - VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); - if (EntryVPBB->empty()) { - // VPlan native path. - EntryVPBB = cast(EntryVPBB->getSingleSuccessor()); - } - return cast(&*EntryVPBB->begin()); + /// Returns the canonical induction VPValue of the vector loop. + VPValue *getCanonicalIV() { return getVectorLoopRegion()->getCanonicalIV(); } + VPCanonicalIVInfo &getCanonicalIVInfo() { + return getVectorLoopRegion()->getCanonicalIVInfo(); } VPValue *getSCEVExpansion(const SCEV *S) const { @@ -4411,22 +4377,22 @@ class VPlan { return VPB; } - /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p - /// IsReplicator is true, the region is a replicate region. The returned block - /// is owned by the VPlan and deleted once the VPlan is destroyed. + /// Create a new replicate VPRegionBlock with \p Entry, \p Exiting and \p + /// Name. The returned block is owned by the VPlan and deleted once the VPlan + /// is destroyed. VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, - const std::string &Name = "", - bool IsReplicator = false) { - auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator); + const std::string &Name = "") { + auto *VPB = new VPRegionBlock(Entry, Exiting, Name); CreatedBlocks.push_back(VPB); return VPB; } - /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set - /// to nullptr. The returned block is owned by the VPlan and deleted once the - /// VPlan is destroyed. - VPRegionBlock *createVPRegionBlock(const std::string &Name = "") { - auto *VPB = new VPRegionBlock(Name); + /// Create a new loop VPRegionBlock with \p StartV and \p Name, and entry and + /// exiting blocks set to nullptr. The returned block is owned by the VPlan + /// and deleted once the VPlan is destroyed. + VPRegionBlock *createVPRegionBlock(DebugLoc DL, + const std::string &Name = "") { + auto *VPB = new VPRegionBlock(DL, Name); CreatedBlocks.push_back(VPB); return VPB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 07bfe7a896d86..afd5b8ab76a59 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -23,14 +23,6 @@ using namespace llvm; #define DEBUG_TYPE "vplan" VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) { - if (auto LoopRegion = Plan.getVectorLoopRegion()) { - if (const auto *CanIV = dyn_cast( - &LoopRegion->getEntryBasicBlock()->front())) { - CanonicalIVTy = CanIV->getScalarType(); - return; - } - } - // If there's no canonical IV, retrieve the type from the trip count // expression. auto *TC = Plan.getTripCount(); @@ -270,18 +262,20 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { return CanonicalIVTy; } + if (auto *CanIV = dyn_cast(V)) + return CanonicalIVTy; + Type *ResultTy = TypeSwitch(V->getDefiningRecipe()) - .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + .Case([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .Case llvm::calculateRegisterUsageForPlan( // FIXME: Might need some motivation why these values are ignored. If // for example an argument is used inside the loop it will increase the // register pressure (so shouldn't we add it to LoopInvariants). - if (!DefR && (!U->getLiveInIRValue() || - !isa(U->getLiveInIRValue()))) + if (!isa(U) && !DefR && + (!U->getLiveInIRValue() || + !isa(U->getLiveInIRValue()))) continue; // If this recipe is outside the loop then record it and continue. - if (!DefR) { + if (!DefR && !isa(U)) { LoopInvariants.insert(U); continue; } @@ -499,6 +494,10 @@ SmallVector llvm::calculateRegisterUsageForPlan( return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); }; + if (auto *CanIV = LoopRegion->getCanonicalIV()) + if (CanIV->getNumUsers() != 0) + OpenIntervals.insert(CanIV); + // We scan the instructions linearly and record each time that a new interval // starts, by placing it in a set. If we find this value in TransposEnds then // we remove it from the set. The max register usage is the maximum register @@ -544,7 +543,7 @@ SmallVector llvm::calculateRegisterUsageForPlan( continue; if (VFs[J].isScalar() || - isa(VPV) || (isa(VPV) && vputils::onlyScalarValuesUsed(VPV)) || (isa(VPV) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index c8212af9f8e00..9a41d269a5680 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -395,7 +395,7 @@ static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, /// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; - auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; + auto *LatchVPBB = cast(HeaderVPB->getPredecessors()[1]); VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); @@ -406,13 +406,22 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { // LatchExitVPB, taking care to preserve the original predecessor & successor // order of blocks. Set region entry and exiting after both HeaderVPB and // LatchVPBB have been disconnected from their predecessors/successors. - auto *R = Plan.createVPRegionBlock(); + VPPhi *ScalarCanIV = nullptr; + if (PreheaderVPBB->getSinglePredecessor() == Plan.getEntry()) + ScalarCanIV = cast(&*cast(HeaderVPB)->begin()); + auto *R = + Plan.createVPRegionBlock(ScalarCanIV ? ScalarCanIV->getDebugLoc() + : DebugLoc::getCompilerGenerated()); VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R); VPBlockUtils::disconnectBlocks(LatchVPBB, R); VPBlockUtils::connectBlocks(PreheaderVPBB, R); R->setEntry(HeaderVPB); R->setExiting(LatchVPBB); + if (ScalarCanIV) { + ScalarCanIV->replaceAllUsesWith(R->getCanonicalIV()); + ScalarCanIV->eraseFromParent(); + } // All VPBB's reachable shallowly from HeaderVPB belong to the current region. for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB)) VPBB->setParent(R); @@ -425,9 +434,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, DebugLoc DL) { Value *StartIdx = ConstantInt::get(IdxTy, 0); auto *StartV = Plan.getOrAddLiveIn(StartIdx); - - // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. - auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); + auto *CanonicalIVPHI = new VPPhi(StartV, DL); HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin()); // We are about to replace the branch to exit the region. Remove the original @@ -440,14 +447,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, } VPBuilder Builder(LatchVPBB); - // Add a VPInstruction to increment the scalar canonical IV by VF * UF. - // Initially the induction increment is guaranteed to not wrap, but that may - // change later, e.g. when tail-folding, when the flags need to be dropped. - auto *CanonicalIVIncrement = Builder.createOverflowingOp( + auto CanonicalIVIncrement = Builder.createOverflowingOp( Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL, "index.next"); - CanonicalIVPHI->addOperand(CanonicalIVIncrement); - // Add the BranchOnCount VPInstruction to the latch. Builder.createNaryOp(VPInstruction::BranchOnCount, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, @@ -663,7 +665,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, VPIRMetadata VPBranchWeights; auto *Term = VPBuilder(CheckBlockVPBB) .createNaryOp(VPInstruction::BranchOnCond, {CondVPV}, - Plan.getCanonicalIV()->getDebugLoc()); + Plan.getCanonicalIVInfo().DL); if (AddBranchWeights) { MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = @@ -829,7 +831,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { VPReductionPHIRecipe *RedPhiR = nullptr; bool HasUnsupportedPhi = false; for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) { - if (isa(&R)) + if (isa(&R)) continue; auto *Cur = dyn_cast(&R); if (!Cur) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 555efea1ea840..802acbe1a3cd8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -296,7 +296,6 @@ struct Recipe_match { auto *DefR = dyn_cast(R); // Check for recipes that do not have opcodes. if constexpr (std::is_same_v || - std::is_same_v || std::is_same_v) return DefR; else @@ -431,12 +430,24 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) { return AllRecipe_commutative_match(Op0, Op1); } +template +inline AllRecipe_match m_Add(const Op0_t &Op0, + const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} + template inline AllRecipe_commutative_match m_c_Add(const Op0_t &Op0, const Op1_t &Op1) { return m_c_Binary(Op0, Op1); } +template +inline AllRecipe_match m_Or(const Op0_t &Op0, + const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} + template inline AllRecipe_match m_Sub(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 46909a53a9547..c2754f787d617 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -67,7 +67,6 @@ bool VPRecipeBase::mayWriteToMemory() const { ->onlyReadsMemory(); case VPWidenIntrinsicSC: return cast(this)->mayWriteToMemory(); - case VPCanonicalIVPHISC: case VPBranchOnMaskSC: case VPFirstOrderRecurrencePHISC: case VPReductionPHISC: @@ -2335,10 +2334,11 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { if (getStepValue()->getDefiningRecipe()) return false; auto *StepC = dyn_cast(getStepValue()->getLiveInIRValue()); + auto *CanIV = getParent()->getParent()->getCanonicalIV(); auto *StartC = dyn_cast(getStartValue()->getLiveInIRValue()); - auto *CanIV = cast(&*getParent()->begin()); return StartC && StartC->isZero() && StepC && StepC->isOne() && - getScalarType() == CanIV->getScalarType(); + getScalarType() == + VPTypeAnalysis(*getParent()->getPlan()).inferScalarType(CanIV); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -4109,16 +4109,6 @@ InstructionCost VPInterleaveBase::computeCost(ElementCount VF, 0); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EMIT "; - printAsOperand(O, SlotTracker); - O << " = CANONICAL-INDUCTION "; - printOperands(O, SlotTracker); -} -#endif - bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { return IsScalarAfterVectorization && (!IsScalable || vputils::onlyFirstLaneUsed(this)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a73b083cff7fd..66e5c190c47a5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -372,8 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, PredRecipe->eraseFromParent(); auto *Exiting = Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); - VPRegionBlock *Region = - Plan.createVPRegionBlock(Entry, Exiting, RegionName, true); + VPRegionBlock *Region = Plan.createVPRegionBlock(Entry, Exiting, RegionName); // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. @@ -501,7 +500,7 @@ static void removeRedundantInductionCasts(VPlan &Plan) { /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan) { - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPValue *CanonicalIV = Plan.getCanonicalIV(); VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; for (VPUser *U : CanonicalIV->users()) { WidenNewIV = dyn_cast(U); @@ -583,7 +582,7 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder) { VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPValue *CanonicalIV = Plan.getCanonicalIV(); VPSingleDefRecipe *BaseIV = Builder.createDerivedIV( Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx"); @@ -801,7 +800,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, // Calculate the final index. VPValue *EndValue = Plan.getCanonicalIV(); - auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); + auto CanonicalIVType = TypeInfo.inferScalarType(EndValue); VPBuilder B(cast(PredVPBB)); DebugLoc DL = cast(Op)->getDebugLoc(); @@ -1235,6 +1234,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (!Plan->isUnrolled()) return; + if (match(Def, m_Add(m_VPValue(X), m_VPValue(Y))) && Y->isLiveIn() && + isa(X)) { + auto *Phi = cast(X); + if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) && + Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) { + Phi->setOperand(0, Y); + Def->replaceAllUsesWith(Phi); + return; + } + } + // VPVectorPointer for part 0 can be replaced by their start pointer. if (auto *VecPtr = dyn_cast(&R)) { if (VecPtr->isFirstPart()) { @@ -1522,9 +1532,11 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, }); auto *CanIV = Plan.getCanonicalIV(); - if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ, - m_Specific(CanIV->getBackedgeValue()), - m_Specific(&Plan.getVectorTripCount())))) + if (!match(Cond, m_Binary( + m_c_Add(m_Specific(CanIV), m_Specific(&Plan.getVFxUF())), + m_Specific(&Plan.getVectorTripCount()))) || + cast(Cond->getDefiningRecipe())->getPredicate() != + CmpInst::ICMP_EQ) return false; // The compare checks CanIV + VFxUF == vector trip count. The vector trip @@ -1683,8 +1695,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, if (all_of(Header->phis(), [](VPRecipeBase &Phi) { if (auto *R = dyn_cast(&Phi)) return R->isCanonical(); - return isa(&Phi); + return isa(&Phi); })) { for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) { if (auto *R = dyn_cast(&HeaderR)) { @@ -1699,6 +1711,9 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0)); HeaderR.eraseFromParent(); } + Plan.getCanonicalIV()->replaceAllUsesWith( + Plan.getOrAddLiveIn(ConstantInt::getNullValue( + VPTypeAnalysis(Plan).inferScalarType(Plan.getCanonicalIV())))); VPBlockBase *Preheader = VectorRegion->getSinglePredecessor(); VPBlockBase *Exit = VectorRegion->getSingleSuccessor(); @@ -2305,15 +2320,18 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); - VPValue *StartV = CanonicalIVPHI->getStartValue(); + VPValue *CanonicalIV = Plan.getCanonicalIV(); + VPValue *StartV = Plan.getOrAddLiveIn(Constant::getNullValue( + VPTypeAnalysis(Plan).inferScalarType(CanonicalIV))); auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); + cast(EB->getTerminator()->getOperand(0)); // TODO: Check if dropping the flags is needed if // !DataAndControlFlowWithoutRuntimeCheck. CanonicalIVIncrement->dropPoisonGeneratingFlags(); - DebugLoc DL = CanonicalIVIncrement->getDebugLoc(); + auto &CanIVInfo = Plan.getCanonicalIVInfo(); + CanIVInfo.HasNUW = false; + DebugLoc DL = CanIVInfo.DL; // We can't use StartV directly in the ActiveLaneMask VPInstruction, since // we have to take unrolling into account. Each part needs to start at // Part * VF @@ -2334,7 +2352,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // When avoiding a runtime check, the active.lane.mask inside the loop // uses a modified trip count and the induction variable increment is // done after the active.lane.mask intrinsic is called. - IncrementValue = CanonicalIVPHI; + IncrementValue = CanonicalIV; TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF, {TC}, DL); } @@ -2344,7 +2362,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // Create the active lane mask instruction in the VPlan preheader. VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(VPTypeAnalysis(Plan).inferScalarType(CanonicalIV), 1)); auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC, ALMMultiplier}, DL, "active.lane.mask.entry"); @@ -2353,7 +2371,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // preheader ActiveLaneMask instruction. auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc::getUnknown()); - LaneMaskPhi->insertAfter(CanonicalIVPHI); + auto *HeaderVPBB = TopRegion->getEntryBasicBlock(); + LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin()); // Create the active lane mask for the next iteration of the loop before the // original terminator. @@ -2440,8 +2459,8 @@ void VPlanTransforms::addActiveLaneMask( Plan, DataAndControlFlowWithoutRuntimeCheck); } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); - VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + VPValue *ALMMultiplier = Plan.getOrAddLiveIn(ConstantInt::get( + VPTypeAnalysis(Plan).inferScalarType(Plan.getCanonicalIV()), 1)); LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier}, @@ -2663,7 +2682,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and /// replaces all uses except the canonical IV increment of -/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe +/// VPCanonicalIV with a VPEVLBasedIVPHIRecipe. VPCanonicalIV /// is used only for loop iterations counting after this transformation. /// /// The function uses the following definitions: @@ -2710,13 +2729,18 @@ void VPlanTransforms::addExplicitVectorLength( return; VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); - auto *CanIVTy = CanonicalIVPHI->getScalarType(); - VPValue *StartV = CanonicalIVPHI->getStartValue(); + auto *CanonicalIV = Plan.getCanonicalIV(); + auto &CanIVInfo = Plan.getCanonicalIVInfo(); + auto *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanonicalIV); + VPValue *StartV = Plan.getOrAddLiveIn(ConstantInt::getNullValue(CanIVTy)); + auto *CanonicalIVIncrement = cast(Plan.getVectorLoopRegion() + ->getExitingBasicBlock() + ->getTerminator() + ->getOperand(0)); // Create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc::getUnknown()); - EVLPhi->insertAfter(CanonicalIVPHI); + EVLPhi->insertBefore(*Header, Header->begin()); VPBuilder Builder(Header, Header->getFirstNonPhi()); // Create the AVL (application vector length), starting from TC -> 0 in steps // of EVL. @@ -2735,8 +2759,6 @@ void VPlanTransforms::addExplicitVectorLength( auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL, DebugLoc::getUnknown()); - auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); Builder.setInsertPoint(CanonicalIVIncrement); VPValue *OpVPEVL = VPEVL; @@ -2745,9 +2767,7 @@ void VPlanTransforms::addExplicitVectorLength( OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc()); auto *NextEVLIV = Builder.createOverflowingOp( - Instruction::Add, {OpVPEVL, EVLPhi}, - {CanonicalIVIncrement->hasNoUnsignedWrap(), - CanonicalIVIncrement->hasNoSignedWrap()}, + Instruction::Add, {OpVPEVL, EVLPhi}, {CanIVInfo.HasNUW, false}, CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); EVLPhi->addOperand(NextEVLIV); @@ -2758,10 +2778,10 @@ void VPlanTransforms::addExplicitVectorLength( transformRecipestoEVLRecipes(Plan, *VPEVL); - // Replace all uses of VPCanonicalIVPHIRecipe by + // Replace all uses of VPCanonicalIV by // VPEVLBasedIVPHIRecipe except for the canonical IV increment. - CanonicalIVPHI->replaceAllUsesWith(EVLPhi); - CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + CanonicalIV->replaceAllUsesWith(EVLPhi); + CanonicalIVIncrement->setOperand(0, CanonicalIV); // TODO: support unroll factor > 1. Plan.setUF(1); } @@ -2812,15 +2832,15 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Replace CanonicalIVInc with EVL-PHI increment. auto *CanonicalIV = cast(&*HeaderVPBB->begin()); VPValue *Backedge = CanonicalIV->getIncomingValue(1); - assert(match(Backedge, m_c_Add(m_Specific(CanonicalIV), - m_Specific(&Plan.getVFxUF()))) && - "Unexpected canonical iv"); - Backedge->replaceAllUsesWith(EVLIncrement); - - // Remove unused phi and increment. - VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe(); - CanonicalIVIncrement->eraseFromParent(); - CanonicalIV->eraseFromParent(); + if (match(Backedge, + m_c_Add(m_Specific(CanonicalIV), m_Specific(&Plan.getVFxUF())))) { + Backedge->replaceAllUsesWith(EVLIncrement); + + // Remove unused phi and increment. + VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe(); + CanonicalIVIncrement->eraseFromParent(); + CanonicalIV->eraseFromParent(); + } // Replace the use of VectorTripCount in the latch-exiting block. // Before: (branch-on-count EVLIVInc, VectorTripCount) @@ -4007,8 +4027,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VFMinVal = VF.getKnownMinValue(); SmallVector StoreGroups; for (auto &R : *VectorLoop->getEntryBasicBlock()) { - if (isa(&R) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) continue; if (isa(&R) && @@ -4162,21 +4181,23 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Adjust induction to reflect that the transformed plan only processes one // original iteration. auto *CanIV = Plan.getCanonicalIV(); - auto *Inc = cast(CanIV->getBackedgeValue()); + Type *CanIVTy = TypeInfo.inferScalarType(CanIV); + auto *Inc = cast( + VectorLoop->getExitingBasicBlock()->getTerminator()->getOperand(0)); VPBuilder PHBuilder(Plan.getVectorPreheader()); - VPValue *UF = Plan.getOrAddLiveIn( - ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF())); + VPValue *UF = + Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 1 * Plan.getUF())); if (VF.isScalable()) { - VPValue *VScale = PHBuilder.createElementCount( - CanIV->getScalarType(), ElementCount::getScalable(1)); + VPValue *VScale = + PHBuilder.createElementCount(CanIVTy, ElementCount::getScalable(1)); VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF}); Inc->setOperand(1, VScaleUF); Plan.getVF().replaceAllUsesWith(VScale); } else { Inc->setOperand(1, UF); Plan.getVF().replaceAllUsesWith( - Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1))); + Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 1))); } removeDeadRecipes(Plan); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 5e7f19faebb56..8eb5954b66bd5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -69,7 +69,7 @@ class UnrollState { VPBasicBlock::iterator InsertPtForPhi); VPValue *getConstantVPV(unsigned Part) { - Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType(); + Type *CanIVIntTy = TypeInfo.inferScalarType(Plan.getCanonicalIV()); return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); } @@ -79,7 +79,7 @@ class UnrollState { void unrollBlock(VPBlockBase *VPB); VPValue *getValueForPart(VPValue *V, unsigned Part) { - if (Part == 0 || V->isLiveIn()) + if (Part == 0 || V->isLiveIn() || isa(V)) return V; assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) && "accessed value does not exist"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 059993043dcda..5fe61218efc3d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -102,9 +102,7 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { return all_of(R->operands(), isUniformAcrossVFsAndUFs); } - auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV(); - // Canonical IV chain is uniform. - if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) + if (isa(V)) return true; return TypeSwitch(R) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 0678bc90ef4b5..2948b578af980 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -40,6 +40,7 @@ class VPUser; class VPRecipeBase; class VPInterleaveBase; class VPPhiAccessors; +class VPRegionValue; // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins @@ -51,6 +52,7 @@ class LLVM_ABI_FOR_TEST VPValue { friend class VPInterleaveBase; friend class VPlan; friend class VPExpressionRecipe; + friend class VPRegionValue; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -89,7 +91,9 @@ class LLVM_ABI_FOR_TEST VPValue { enum { VPValueSC, /// A generic VPValue, like live-in values or defined by a recipe /// that defines multiple values. - VPVRecipeSC /// A VPValue sub-class that is a VPRecipeBase. + VPVRecipeSC, /// A VPValue sub-class that is a VPRecipeBase. + VPRegionValueSC, /// A VPValue sub-class defines the canonical IV of a loop + /// region. }; VPValue(const VPValue &) = delete; @@ -168,7 +172,9 @@ class LLVM_ABI_FOR_TEST VPValue { bool hasDefiningRecipe() const { return getDefiningRecipe(); } /// Returns true if this VPValue is a live-in, i.e. defined outside the VPlan. - bool isLiveIn() const { return !hasDefiningRecipe(); } + bool isLiveIn() const { + return !hasDefiningRecipe() && SubclassID != VPRegionValueSC; + } /// Returns the underlying IR value, if this VPValue is defined outside the /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef @@ -189,6 +195,18 @@ class LLVM_ABI_FOR_TEST VPValue { } }; +/// VPValues defined by a VPRegionBlock, like the canonical IV. +class VPRegionValue : public VPValue { +public: + VPRegionValue() : VPValue(VPValue::VPRegionValueSC) {} + + ~VPRegionValue() override = default; + + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPRegionValueSC; + } +}; + typedef DenseMap Value2VPValueTy; typedef DenseMap VPValue2ValueTy; @@ -364,7 +382,6 @@ class VPDef { VPPredInstPHISC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. // VPHeaderPHIRecipe need to be kept together. - VPCanonicalIVPHISC, VPActiveLaneMaskPHISC, VPEVLBasedIVPHISC, VPFirstOrderRecurrencePHISC, @@ -374,7 +391,7 @@ class VPDef { // END: SubclassID for recipes that inherit VPHeaderPHIRecipe // END: Phi-like recipes VPFirstPHISC = VPWidenPHISC, - VPFirstHeaderPHISC = VPCanonicalIVPHISC, + VPFirstHeaderPHISC = VPActiveLaneMaskPHISC, VPLastHeaderPHISC = VPReductionPHISC, VPLastPHISC = VPReductionPHISC, }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 013ea2e883534..dd9423fc02237 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -460,12 +460,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) { return false; } - if (!isa(&*Entry->begin())) { - errs() << "VPlan vector loop header does not start with a " - "VPCanonicalIVPHIRecipe\n"; - return false; - } - const VPBasicBlock *Exiting = dyn_cast(TopRegion->getExiting()); if (!Exiting) { errs() << "VPlan exiting block is not a VPBasicBlock\n"; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 6cf11be0e11f7..240cf782a0a2b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -580,11 +580,11 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP13:%.*]] = or [[BROADCAST_SPLAT]], splat (i16 1) ; PRED-NEXT: [[TMP14:%.*]] = uitofp [[TMP13]] to ; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP14]], ptr [[NEXT_GEP]], i32 8, [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true -; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[EXIT]]: @@ -1048,12 +1048,12 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]] ; PRED: [[PRED_STORE_CONTINUE27]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) ; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] ; PRED: [[SCALAR_PH]]: @@ -1344,11 +1344,11 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: [[TMP26:%.*]] = fptoui [[TMP25]] to ; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[C]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP26]], ptr [[TMP27]], i32 1, [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; PRED-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true -; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] ; PRED: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index d10a26d1a73df..664ebbffb5837 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -140,11 +140,11 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( zeroinitializer, ptr [[TMP34]], i32 8, [[TMP23]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP35:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -257,11 +257,11 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP37:%.*]] = ashr i64 [[TMP36]], 32 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP37]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP23]], ptr [[TMP38]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP39:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index 649e34e09edbf..c508cbe67e498 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -25,10 +25,10 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-UF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-UF1-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-UF1-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-UF1-NEXT: [[TMP6:%.*]] = xor i1 [[TMP5]], true +; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-UF1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF1: middle.block: ; @@ -66,7 +66,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK4]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK5]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP19]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK6]]) -; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-UF4-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 ; CHECK-UF4-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 ; CHECK-UF4-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 @@ -80,6 +79,7 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT9:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP15]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-UF4-NEXT: [[TMP20:%.*]] = xor i1 [[TMP21]], true +; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF4: middle.block: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll index c3b0bc8c00a74..8aaada8b0722a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll @@ -10,10 +10,10 @@ define i64 @test(ptr %a, ptr %b) #0 { ; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 8: WIDEN-REDUCTION-PHI ir<{{.+}}> = phi vp<{{.+}}>, ir<{{.+}}> ; CHECK: Cost for VF 8: 30 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 16: WIDEN-REDUCTION-PHI ir<{{.+}}> = phi vp<{{.+}}>, ir<{{.+}}> ; CHECK: Cost for VF 16: 56 ; CHECK: LV: Selecting VF: 16 entry: @@ -44,11 +44,9 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 { ; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 30 ; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 16: 57 ; CHECK: LV: Selecting VF: vscale x 2 entry: @@ -81,11 +79,9 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 { ; CHECK-NEXT: Cost of 1 for VF 8: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 27 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 16: 48 ; CHECK: LV: Selecting VF: 16 entry: @@ -117,10 +113,8 @@ define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef ; CHECK: Cost of 4 for VF 8: induction instruction %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 12 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 16: 4 ; CHECK: LV: Selecting VF: 16 entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index fd6e275d098ca..3d2a985f768cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -156,10 +156,10 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP17:%.*]] = trunc [[TMP16]] to ; PRED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP17]], ptr [[TMP18]], i32 1, [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP20:%.*]] = xor i1 [[TMP19]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -322,11 +322,11 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 1, ptr [[TMP23]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) ; PRED-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -510,11 +510,11 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; PRED: [[PRED_STORE_CONTINUE7]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) ; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -699,11 +699,11 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; PRED: [[PRED_STORE_CONTINUE6]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) ; PRED-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP35:%.*]] = xor i1 [[TMP34]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 157b78704234a..d87a7f22f012d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -83,10 +83,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8 ; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.continue2: -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) ; TFCOMMON-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; TFCOMMON-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFCOMMON-NEXT: br i1 [[TMP17]], label [[END:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: end: ; TFCOMMON-NEXT: ret void @@ -144,6 +144,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[TMP27]], i64 [[TMP3]]) ; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor i1 [[TMP26]], true +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP28]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: end: ; TFA_INTERLEAVE-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index d8a81f9316e4b..0a964a75b81f8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -967,11 +967,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: store double [[PREDPHI3]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[TMP9]] ; TFA_INTERLEAVE: [[TMP9]]: -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT6]] = icmp ult i64 [[TMP20]], [[TMP3]] ; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor i1 [[ACTIVE_LANE_MASK_NEXT]], true +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFA_INTERLEAVE: [[END]]: ; TFA_INTERLEAVE-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 56ace5497b996..24e56ffdbab23 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -351,10 +351,10 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -473,12 +473,12 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP21:%.*]] = add [[TMP18]], [[TMP20]] ; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; DEFAULT-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr [[TMP22]], i32 1, [[ACTIVE_LANE_MASK]]) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -523,12 +523,12 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP21:%.*]] = add [[TMP18]], [[TMP20]] ; OPTSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; OPTSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr [[TMP22]], i32 1, [[ACTIVE_LANE_MASK]]) -; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; OPTSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; OPTSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: @@ -573,12 +573,12 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP21:%.*]] = add [[TMP18]], [[TMP20]] ; MINSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; MINSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr [[TMP22]], i32 1, [[ACTIVE_LANE_MASK]]) -; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; MINSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; MINSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 6e11e559151da..277f0183c74f9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -1392,7 +1392,6 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[TMP19]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true @@ -1430,7 +1429,6 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP19]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], [[VEC_PHI]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true @@ -1468,7 +1466,6 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP18]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = xor i1 [[TMP19]], true diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 0f82de629afa9..1656997deed9e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -233,10 +233,10 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP39:%.*]] = or [[WIDE_MASKED_GATHER]], [[VEC_PHI]] ; PRED-NEXT: [[TMP40:%.*]] = or [[TMP39]], [[WIDE_MASKED_GATHER7]] ; PRED-NEXT: [[TMP41]] = select [[ACTIVE_LANE_MASK]], [[TMP40]], [[VEC_PHI]] -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[IV]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP43:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP42:%.*]] = xor i1 [[TMP43]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP2]] ; PRED-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP41]]) @@ -455,11 +455,11 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP20:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] ; PRED-NEXT: [[TMP21:%.*]] = or [[TMP20]], [[VEC_PHI]] ; PRED-NEXT: [[TMP16]] = select [[ACTIVE_LANE_MASK]], [[TMP21]], [[VEC_PHI]] -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP12]]) ; PRED-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true -; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP19:%.*]] = call i16 @llvm.vector.reduce.or.nxv8i16( [[TMP16]]) ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 5072058ed5b8f..763a802f0e294 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -137,10 +137,10 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP8]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -362,7 +362,6 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP26]], [[TMP27]]) ; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP30]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP29]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] @@ -602,7 +601,6 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP16]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP15]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true @@ -817,7 +815,6 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = fadd [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP10]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP7]]) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true @@ -1020,7 +1017,6 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[WIDE_MASKED_LOAD1]], splat (float 3.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true @@ -1449,7 +1445,6 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], [[TMP41]]) ; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP36]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] @@ -1745,7 +1740,6 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], [[TMP41]]) ; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP36]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index c775b44bd1ba6..59d47497b7b25 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -82,10 +82,10 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( zeroinitializer, ptr [[TMP13]], i32 1, [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; PRED-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP12:%.*]] = xor i1 [[TMP14]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 3b0bd87587cc0..1ece9e1f7305f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -109,10 +109,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP15]], [[TMP18]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP9]], [[TMP9]]) ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP17]], i32 1, [[INTERLEAVED_MASK3]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP19]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -240,7 +240,6 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg [[TMP13]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP14]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP15]], i32 1, [[TMP12]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] @@ -375,7 +374,6 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg [[TMP15]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP16]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP17]], i32 1, [[TMP14]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] @@ -536,7 +534,6 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv64i8( [[TMP17]], [[TMP18]], [[TMP19]], [[TMP20]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP9]], [[TMP9]], [[TMP9]], [[TMP9]]) ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP22]], i32 1, [[INTERLEAVED_MASK3]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index b8b4fbd3140de..63b643bd2d600 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -62,10 +62,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index 33ee0d6e2ae2f..6fc5232fd1d44 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -87,7 +87,6 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[WIDE_MASKED_LOAD]], splat (i32 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP4]], ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index b5544dc3310c9..ac48e8124a13f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -29,10 +29,10 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP14]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) @@ -63,10 +63,10 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP13]]) ; CHECK-IN-LOOP-NEXT: [[TMP15]] = add i32 [[VEC_PHI]], [[TMP14]] -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = xor i1 [[TMP18]], true +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] @@ -113,7 +113,6 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) ; CHECK-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true @@ -145,7 +144,6 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) ; CHECK-IN-LOOP-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true @@ -199,7 +197,6 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = xor [[VEC_PHI]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP20]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP16]], true @@ -236,7 +233,6 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = select [[TMP15]], [[WIDE_MASKED_LOAD1]], zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP17]]) ; CHECK-IN-LOOP-NEXT: [[TMP19]] = xor i32 [[VEC_PHI]], [[TMP18]] -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = xor i1 [[TMP22]], true diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 5531b3ca51140..a878cb5f1187c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -53,7 +53,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK7]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK8]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]]) -; CHECK-NEXT: [[INDEX_NEXT10]] = add i64 [[INDEX6]], [[TMP62]] ; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP64:%.*]] = shl nuw i64 [[TMP63]], 2 ; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX6]], [[TMP64]] @@ -69,6 +68,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP71]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP35:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true +; CHECK-NEXT: [[INDEX_NEXT10]] = add i64 [[INDEX6]], [[TMP62]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -161,7 +161,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP76]], i32 4, [[TMP70]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP79]], i32 4, [[TMP71]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP82]], i32 4, [[TMP72]]) -; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP6]] ; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP86:%.*]] = shl nuw i64 [[TMP85]], 2 ; CHECK-NEXT: [[TMP87:%.*]] = add i64 [[INDEX6]], [[TMP86]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 9ebe79096adc4..1b2f57e9b2697 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -26,10 +26,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -70,7 +70,6 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[TMP6]], true @@ -118,11 +117,15 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP12]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: @@ -178,12 +181,16 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP20]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: @@ -231,11 +238,15 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: @@ -285,11 +296,15 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP14]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: @@ -344,11 +359,15 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], [[WIDE_MASKED_GATHER]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP16]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: @@ -406,11 +425,15 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: @@ -458,11 +481,15 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP15:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP15]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: @@ -514,11 +541,15 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], splat (i32 1) ; CHECK-NEXT: [[TMP16:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP16]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = xor i1 [[TMP14]], true +<<<<<<< HEAD ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +======= +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +>>>>>>> d61407faccd2 ([VPlan] Make canonical IV part of the region) ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index 5ee4e9efc0058..2ac34250ff6c8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -27,10 +27,10 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF1-NEXT: [[TMP6:%.*]] = mul [[WIDE_MASKED_LOAD]], splat (i8 3) ; CHECK-UF1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-UF1-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP6]], ptr [[TMP13]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-UF1-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-UF1-NEXT: [[TMP11:%.*]] = xor i1 [[TMP14]], true +; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] ; CHECK-UF1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF1: middle.block: ; @@ -102,7 +102,6 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP26]], ptr [[TMP39]], i32 1, [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP27]], ptr [[TMP42]], i32 1, [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP28]], ptr [[TMP45]], i32 1, [[ACTIVE_LANE_MASK8]]) -; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] ; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4 ; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] @@ -122,6 +121,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement [[TMP55]], i32 0 ; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true +; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] ; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF4: middle.block: ; @@ -169,11 +169,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF1-NEXT: [[TMP3:%.*]] = fmul [[WIDE_MASKED_LOAD]], splat (double 3.000000e+00) ; CHECK-UF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-UF1-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP3]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP13]]) ; CHECK-UF1-NEXT: [[TMP7:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-UF1-NEXT: [[TMP6:%.*]] = xor i1 [[TMP7]], true -; CHECK-UF1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-UF1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UF1: middle.block: ; ; CHECK-UF4-LABEL: define void @scalable_wide_active_lane_mask_double( @@ -247,7 +247,6 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP17]], ptr [[TMP34]], i32 8, [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP18]], ptr [[TMP37]], i32 8, [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP19]], ptr [[TMP40]], i32 8, [[ACTIVE_LANE_MASK8]]) -; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1 ; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] @@ -267,7 +266,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement [[TMP50]], i32 0 ; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true -; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] +; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-UF4: middle.block: ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 742097bdae890..e89c1ff358e17 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -556,9 +556,9 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK-NEXT: [[TMP9:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], [[TMP9]] ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll index b8f4e8435e9cd..55ca53f99b442 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -27,10 +27,10 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IDX]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: [[NEXT_ACTIVE_LANE_MASK]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[IDX]], i64 [[N2]]) ; CHECK-NEXT: [[EXTRACT_FIRST_LANE_MASK:%.*]] = extractelement <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: [[FIRST_LANE_SET:%.*]] = xor i1 [[EXTRACT_FIRST_LANE_MASK]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: br i1 [[FIRST_LANE_SET]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[FOR_END:label %.*]] @@ -81,11 +81,11 @@ define void @cond_uniform_load(ptr noalias nocapture %dst, ptr nocapture readonl ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX6]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[PREDPHI]], ptr [[TMP7]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX6]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX6]], i64 [[TMP3]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX6]], 4 +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[FOR_END:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 1607755e624a3..c346170b04433 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -157,10 +157,10 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll index e9de5e21228fd..9c886a4128c62 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll @@ -25,9 +25,9 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; CHECK-NEXT: [[TMP6:%.*]] = call @foo_uniform( [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -63,13 +63,13 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]] ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr [[TMP14]], i32 8, [[ACTIVE_LANE_MASK2]]) -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void @@ -111,9 +111,9 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; CHECK-NEXT: [[TMP6:%.*]] = call @bar_uniform( [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -149,13 +149,13 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]] ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr [[TMP14]], i32 8, [[ACTIVE_LANE_MASK2]]) -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void @@ -227,10 +227,10 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]] ; INTERLEAVE: pred.store.continue4: -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP10]], [[TMP0]] +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK_NEXT]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll index bdf832f32964f..f92cbc029ffe4 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll @@ -22,7 +22,6 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 1 for VF 2: induction instruction %inc = add nuw nsw i32 %i.016, 1 ; CHECK: Cost of 0 for VF 2: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n -; CHECK: Cost of 0 for VF 2: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 2: vp<{{.+}}> = SCALAR-STEPS vp<{{.+}}>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<{{.+}}> ; CHECK: Cost of 0 for VF 2: vp<{{.+}}> = vector-pointer ir<%arrayidx> @@ -39,7 +38,6 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 1 for VF 4: induction instruction %inc = add nuw nsw i32 %i.016, 1 ; CHECK: Cost of 0 for VF 4: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n -; CHECK: Cost of 0 for VF 4: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 4: vp<{{.+}}> = SCALAR-STEPS vp<{{.+}}>, ir<1> ; CHECK: Cost of 0 for VF 4: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<{{.+}}> ; CHECK: Cost of 0 for VF 4: vp<{{.+}}> = vector-pointer ir<%arrayidx> @@ -56,7 +54,6 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 1 for VF 8: induction instruction %inc = add nuw nsw i32 %i.016, 1 ; CHECK: Cost of 0 for VF 8: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n -; CHECK: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 8: vp<{{.+}}> = SCALAR-STEPS vp<{{.+}}>, ir<1> ; CHECK: Cost of 0 for VF 8: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<{{.+}}> ; CHECK: Cost of 0 for VF 8: vp<{{.+}}> = vector-pointer ir<%arrayidx> @@ -135,8 +132,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 2: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 2: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 2: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 2: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 2: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> @@ -167,8 +163,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 4: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 4: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 4: EMIT vp<[[CAN_IV:%.]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 4: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 4: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 4: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> @@ -199,8 +194,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 8: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 8: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 8: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 8: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 8: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 8: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 8: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> @@ -231,8 +225,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 16: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 16: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 16: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 16: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 16: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 16: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 16: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 6ea075f76aed4..1b7b95f75ee4a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -347,10 +347,10 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll index bb85b88f181f7..d72e33a0355a2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll @@ -10,7 +10,6 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1 ; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 -; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> @@ -28,7 +27,6 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 -; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll index 2cda2533e80e0..5a18aed0c0556 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll @@ -32,9 +32,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Cost of 1 for VF 2: induction instruction %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i32 %lftr.wideiv, %n ; CHECK: Cost of 0 for VF 2: exit condition instruction %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 1 for VF 2: WIDEN-REDUCTION-PHI ir<%sum.013> = phi vp<{{.+}}>, vp<[[EXT:%.+]]> -; CHECK: Cost of 0 for VF 2: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 2: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK: Cost of 0 for VF 2: vp<[[VECP1:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%0> = load vp<[[VECP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 70b1ea13677b8..69efea36a22b1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -397,9 +397,9 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] -; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) -; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) ; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) @@ -723,9 +723,9 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] -; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) -; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) ; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll index 5e88072517b37..fcfffba9041de 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll @@ -63,8 +63,8 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -139,8 +139,8 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -209,8 +209,8 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -285,8 +285,8 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -358,8 +358,8 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 91e0037d12c61..06fcce3533ed3 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -21,8 +21,9 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -31,7 +32,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -89,8 +90,9 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr inbounds ir<%y>, ir<%iv> @@ -102,7 +104,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -163,8 +165,9 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%i>, ir<5> @@ -191,7 +194,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -258,8 +261,9 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<4> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<4>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr inbounds ir<@AB>, ir<0>, vp<[[STEPS]]> @@ -274,7 +278,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: store ir<1> to index 1 ; CHECK-NEXT: store ir<2> to index 2 ; CHECK-NEXT: store ir<%AB.3> to index 3 -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -344,8 +348,9 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr inbounds ir<%asd>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%isd> @@ -377,7 +382,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: BLEND ir<%ysd.0> = ir<%psd> vp<[[PHI]]>/vp<[[OR1]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT:} @@ -454,15 +459,16 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]> (truncated to i8) ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]> ; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%v3>, ir<%gep> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -521,15 +527,16 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -587,8 +594,9 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.y> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.y> @@ -599,7 +607,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%div> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -658,8 +666,9 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> @@ -669,7 +678,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -727,8 +736,9 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%ld.addr> @@ -759,7 +769,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -827,8 +837,9 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> @@ -838,7 +849,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -896,8 +907,9 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%p>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> @@ -905,7 +917,7 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l> ; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-element ir<%zext> ; CHECK-NEXT: CLONE store vp<[[EXT]]>, ir<%p1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -942,8 +954,9 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> @@ -1015,8 +1028,9 @@ define void @print_select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, pt ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV ir<0> +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT_EXIT:%.+]]> ; CHECK-NEXT: vp<[[ST:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%b>, vp<[[ST]]> ; CHECK-NEXT: vp<[[PTR1:%.+]]> = vector-pointer ir<[[GEP1]]> @@ -1030,7 +1044,7 @@ define void @print_select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, pt ; CHECK-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds nuw ir<%a>, vp<[[ST]]> ; CHECK-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> ; CHECK-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[SELECT]]> -; CHECK-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index a943e7ac12b1b..91168d6de21fe 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -57,9 +57,7 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { EXPECT_EQ(&*Plan, VecBB->getPlan()); auto Iter = VecBB->begin(); - auto *CanIV = dyn_cast(&*Iter++); - EXPECT_NE(nullptr, CanIV); - auto *Phi = dyn_cast(&*Iter++); + VPWidenPHIRecipe *Phi = dyn_cast(&*Iter++); EXPECT_NE(nullptr, Phi); VPInstruction *Idx = dyn_cast(&*Iter++); @@ -218,7 +216,6 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); auto Iter = VecBB->begin(); - EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp index 582094bed3ef7..817af82bdbf43 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp @@ -23,32 +23,31 @@ using VPPatternMatchTest = VPlanTestBase; TEST_F(VPPatternMatchTest, ScalarIVSteps) { VPlan &Plan = getPlan(); + IntegerType *I64Ty = IntegerType::get(C, 64); + VPRegionBlock *VPR = + Plan.createVPRegionBlock(DebugLoc::getCompilerGenerated(), ""); + VPValue *CanIV = VPR->getCanonicalIV(); VPBasicBlock *VPBB = Plan.createVPBasicBlock(""); VPBuilder Builder(VPBB); - IntegerType *I64Ty = IntegerType::get(C, 64); - VPValue *StartV = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0)); - auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DebugLoc()); - Builder.insert(CanonicalIVPHI); - VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1)); VPValue *VF = &Plan.getVF(); - VPValue *Steps = Builder.createScalarIVSteps( - Instruction::Add, nullptr, CanonicalIVPHI, Inc, VF, DebugLoc()); + VPValue *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, CanIV, + Inc, VF, DebugLoc()); VPValue *Inc2 = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 2)); - VPValue *Steps2 = Builder.createScalarIVSteps( - Instruction::Add, nullptr, CanonicalIVPHI, Inc2, VF, DebugLoc()); + VPValue *Steps2 = Builder.createScalarIVSteps(Instruction::Add, nullptr, + CanIV, Inc2, VF, DebugLoc()); using namespace VPlanPatternMatch; - ASSERT_TRUE(match(Steps, m_ScalarIVSteps(m_Specific(CanonicalIVPHI), - m_SpecificInt(1), m_Specific(VF)))); + ASSERT_TRUE(match(Steps, m_ScalarIVSteps(m_Specific(CanIV), m_SpecificInt(1), + m_Specific(VF)))); ASSERT_FALSE( - match(Steps2, m_ScalarIVSteps(m_Specific(CanonicalIVPHI), - m_SpecificInt(1), m_Specific(VF)))); - ASSERT_TRUE(match(Steps2, m_ScalarIVSteps(m_Specific(CanonicalIVPHI), - m_SpecificInt(2), m_Specific(VF)))); + match(Steps2, m_ScalarIVSteps(m_Specific(CanIV), m_SpecificInt(1), + m_Specific(VF)))); + ASSERT_TRUE(match(Steps2, m_ScalarIVSteps(m_Specific(CanIV), m_SpecificInt(2), + m_Specific(VF)))); } TEST_F(VPPatternMatchTest, GetElementPtr) { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index c2f045bf524e9..b4de7b8780e3b 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -24,14 +24,12 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPInstruction *UseI = new VPInstruction(Instruction::Sub, {DefI}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPBasicBlock *VPBB1 = Plan.getEntry(); VPBB1->appendRecipe(UseI); VPBB1->appendRecipe(DefI); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); - VPBB2->appendRecipe(CanIV); VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(R1, Plan.getScalarHeader()); @@ -59,15 +57,13 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPInstruction *UseI = new VPInstruction(Instruction::Sub, {DefI}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {UseI}); VPBasicBlock *VPBB1 = Plan.getEntry(); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPBB1->appendRecipe(UseI); - VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(DefI); VPBB2->appendRecipe(BranchOnCond); @@ -100,9 +96,8 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 0)); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {DefI}); auto *Blend = new VPBlendRecipe(Phi, {DefI}, {}); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -110,7 +105,6 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); VPBasicBlock *VPBB4 = Plan.createVPBasicBlock(""); - VPBB2->appendRecipe(CanIV); VPBB3->appendRecipe(Blend); VPBB4->appendRecipe(DefI); VPBB4->appendRecipe(BranchOnCond); @@ -157,8 +151,6 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { VPPhi *Phi = new VPPhi({DefI}, {}); VPBB2->appendRecipe(Phi); VPBB2->appendRecipe(DefI); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - VPBB3->appendRecipe(CanIV); VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB3, VPBB3, "R1"); VPBlockUtils::connectBlocks(VPBB1, VPBB2); @@ -186,9 +178,8 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *I1 = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {I1}); VPInstruction *BranchOnCond2 = new VPInstruction(VPInstruction::BranchOnCond, {I1}); @@ -197,7 +188,6 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBB1->appendRecipe(I1); VPBB1->appendRecipe(BranchOnCond2); - VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(BranchOnCond); VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB2, VPBB2, "R1"); @@ -220,9 +210,8 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *I1 = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {I1}); VPInstruction *BranchOnCond2 = new VPInstruction(VPInstruction::BranchOnCond, {I1}); @@ -231,7 +220,6 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); VPBB1->appendRecipe(I1); - VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(BranchOnCond2); VPBB3->appendRecipe(BranchOnCond); @@ -260,8 +248,6 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - VPBB2->appendRecipe(CanIV); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPInstruction *BranchOnCond = @@ -289,14 +275,11 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { TEST_F(VPVerifierTest, NonHeaderPHIInHeader) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - auto *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + auto *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {Zero}); VPBasicBlock *VPBB1 = Plan.getEntry(); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("header"); - VPBB2->appendRecipe(CanIV); - PHINode *PHINode = PHINode::Create(Type::getInt32Ty(C), 2); auto *IRPhi = new VPIRPhi(*PHINode); VPBB2->appendRecipe(IRPhi);