From 76647c564632a244a9ca933adc9ec9d7f6555175 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Fri, 12 Dec 2025 01:48:26 -0800 Subject: [PATCH] Transform VPFirstOrderRecurrencePHIRecipe into concrete recipes --- .../Transforms/Vectorize/LoopVectorize.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +- .../Transforms/Vectorize/VPlanAnalysis.cpp | 1 + .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 40 +++-------- .../Transforms/Vectorize/VPlanTransforms.cpp | 30 +++++++- .../Transforms/Vectorize/VPlanTransforms.h | 2 +- .../AArch64/reduction-recurrence-costs-sve.ll | 25 +++---- .../AArch64/sve-interleaved-accesses.ll | 7 +- .../first-order-recurrence-scalable-vf1.ll | 4 +- .../tail-folding-fixed-order-recurrence.ll | 69 +++++-------------- .../first-order-recurrence-scalable-vf1.ll | 4 +- .../scalable-first-order-recurrence.ll | 36 +++++----- 12 files changed, 96 insertions(+), 130 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0108351f821f4..e2a7ddbe88966 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7383,7 +7383,7 @@ DenseMap LoopVectorizationPlanner::executePlan( : TargetTransformInfo::RGK_FixedWidthVector)); VPlanTransforms::removeDeadRecipes(BestVPlan); - VPlanTransforms::convertToConcreteRecipes(BestVPlan); + VPlanTransforms::convertToConcreteRecipes(BestVPlan, BestVF); // Regions are dissolved after optimizing for VF and UF, which completely // removes unneeded loop regions first. VPlanTransforms::dissolveLoopRegions(BestVPlan); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0f8f6abab7b0a..2afb7edeced16 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2376,7 +2376,11 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { cast(getUnderlyingInstr()), *getOperand(0)); } - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable( + "cannot execute this recipe since VPFirstOrderRecurrencePHIRecipe " + "should be transformed to VPWidenPHIRecipe"); + } /// Return the cost of this first-order recurrence phi recipe. InstructionCost computeCost(ElementCount VF, diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index a586aafa2855d..c695b5c22cc65 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -75,6 +75,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { switch (Opcode) { case Instruction::ExtractElement: case Instruction::Freeze: + case Instruction::InsertElement: case VPInstruction::ReductionStartVector: case VPInstruction::ResumeForEpilogue: return inferScalarType(R->getOperand(0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index a56b165bc1a5e..696778a18d279 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -459,6 +459,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::WidePtrAdd: case VPInstruction::WideIVStep: return 2; + case Instruction::InsertElement: case Instruction::Select: case VPInstruction::ActiveLaneMask: case VPInstruction::ComputeAnyOfResult: @@ -572,6 +573,13 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *B = State.get(getOperand(1), OnlyFirstLaneUsed); return Builder.CreateCmp(getPredicate(), A, B, Name); } + case Instruction::InsertElement: { + assert(State.VF.isVector() && "Only insert element into vector"); + Value *Vec = State.get(getOperand(0)); + Value *Elt = State.get(getOperand(1), /*IsScalar=*/true); + Value *Idx = State.get(getOperand(2), /*IsScalar=*/true); + return Builder.CreateInsertElement(Vec, Elt, Idx, Name); + } case Instruction::PHI: { llvm_unreachable("should be handled by VPPhi::execute"); } @@ -1179,6 +1187,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case Instruction::Freeze: case Instruction::FCmp: case Instruction::ICmp: + case Instruction::InsertElement: case Instruction::Select: case Instruction::PHI: case VPInstruction::AnyOf: @@ -1223,6 +1232,8 @@ bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const { return false; case Instruction::ExtractElement: return Op == getOperand(1); + case Instruction::InsertElement: + return Op == getOperand(1) || Op == getOperand(2); case Instruction::PHI: return true; case Instruction::FCmp: @@ -4299,35 +4310,6 @@ void VPWidenCanonicalIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif -void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { - auto &Builder = State.Builder; - // Create a vector from the initial value. - auto *VectorInit = getStartValue()->getLiveInIRValue(); - - Type *VecTy = State.VF.isScalar() - ? VectorInit->getType() - : VectorType::get(VectorInit->getType(), State.VF); - - BasicBlock *VectorPH = - State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); - if (State.VF.isVector()) { - auto *IdxTy = Builder.getInt32Ty(); - auto *One = ConstantInt::get(IdxTy, 1); - IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(VectorPH->getTerminator()); - auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); - auto *LastIdx = Builder.CreateSub(RuntimeVF, One); - VectorInit = Builder.CreateInsertElement( - PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); - } - - // Create a phi node for the new recurrence. - PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur"); - Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); - Phi->addIncoming(VectorInit, VectorPH); - State.set(this, Phi); -} - InstructionCost VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6627133878fdb..b4682cb27330f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3667,7 +3667,7 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { R->dissolveToCFGLoop(); } -void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { +void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, ElementCount VF) { VPTypeAnalysis TypeInfo(Plan); SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( @@ -3695,6 +3695,34 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { continue; } + if (auto *FORPhiR = dyn_cast(&R)) { + VPValue *InitVec = FORPhiR->getStartValue(); + DebugLoc DL = FORPhiR->getDebugLoc(); + if (VF.isVector()) { + VPBuilder PHBuilder(Plan.getVectorPreheader()); + VPValue *Poison = Plan.getOrAddLiveIn( + PoisonValue::get(TypeInfo.inferScalarType(InitVec))); + Type *IdxTy = Type::getInt32Ty(Plan.getContext()); + VPValue *RuntimeVF = PHBuilder.createScalarZExtOrTrunc( + &Plan.getVF(), IdxTy, TypeInfo.inferScalarType(&Plan.getVF()), + DL); + VPValue *LastIdx = PHBuilder.createOverflowingOp( + Instruction::Sub, {RuntimeVF, Plan.getConstantInt(IdxTy, 1)}, + {false, false}, DL); + InitVec = PHBuilder.createNaryOp(Instruction::InsertElement, + {Poison, InitVec, LastIdx}, DL, + "vector.recur.init"); + } + auto *WidenPhi = + new VPWidenPHIRecipe(cast(FORPhiR->getUnderlyingInstr()), + InitVec, DL, "vector.recur"); + WidenPhi->addOperand(FORPhiR->getBackedgeValue()); + WidenPhi->insertBefore(FORPhiR); + FORPhiR->replaceAllUsesWith(WidenPhi); + ToRemove.push_back(FORPhiR); + continue; + } + // Expand VPBlendRecipe into VPInstruction::Select. VPBuilder Builder(&R); if (auto *Blend = dyn_cast(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index afdf1655b4622..067b71793cf90 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -284,7 +284,7 @@ struct VPlanTransforms { static void canonicalizeEVLLoops(VPlan &Plan); /// Lower abstract recipes to concrete ones, that can be codegen'd. - static void convertToConcreteRecipes(VPlan &Plan); + static void convertToConcreteRecipes(VPlan &Plan, ElementCount VF); /// This function converts initial recipes to the abstract recipes and clamps /// \p Range based on cost model for following optimizations and cost diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index f2c0ca30a6c18..6a1618a8caa0e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -56,7 +56,8 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VSCALEFORTUNING2: [[VECTOR_PH]]: ; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4 +; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP5]], 2 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 @@ -73,18 +74,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor [[TMP13]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext [[TMP14]] to ; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], [[TMP15]] -; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP18]], 4 -; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1 -; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 0, i32 [[TMP20]] -; VSCALEFORTUNING2-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() -; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 4 +; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP5]] to i32 ; VSCALEFORTUNING2-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1 ; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement poison, i32 0, i32 [[TMP23]] ; VSCALEFORTUNING2-NEXT: br label %[[VECTOR_BODY:.*]] ; VSCALEFORTUNING2: [[VECTOR_BODY]]: ; VSCALEFORTUNING2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ] +; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP47:%.*]], %[[VECTOR_BODY]] ] ; VSCALEFORTUNING2-NEXT: [[VEC_PHI5:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ] @@ -133,13 +129,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VSCALEFORTUNING2: [[SCALAR_PH]]: ; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP50]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: br label %[[LOOP:.*]] ; VSCALEFORTUNING2: [[LOOP]]: ; VSCALEFORTUNING2-NEXT: [[TMP54:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP57:%.*]], %[[LOOP]] ] -; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT11]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ] +; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT10]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ] ; VSCALEFORTUNING2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VSCALEFORTUNING2-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ] ; VSCALEFORTUNING2-NEXT: [[TMP56:%.*]] = add i64 [[Y]], 1 @@ -200,19 +196,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP18:%.*]] = xor [[TMP17]], splat (i32 1) ; PRED-NEXT: [[TMP19:%.*]] = zext [[TMP18]] to ; PRED-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], [[TMP19]] -; PRED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() -; PRED-NEXT: [[TMP23:%.*]] = mul nuw i32 [[TMP22]], 4 -; PRED-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1 -; PRED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 0, i32 [[TMP24]] -; PRED-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32() -; PRED-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP25]], 4 +; PRED-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP2]] to i32 ; PRED-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 1 ; PRED-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement poison, i32 0, i32 [[TMP27]] ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] -; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ] +; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP41:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP12]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 8935010e71676..8825362bb875f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1272,15 +1272,14 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP10]] ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2 -; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP34]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index e35db479dc963..6d29f693c18c1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -13,9 +13,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP2]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP5]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index b95691f6e7c04..17968251ff4c1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -18,9 +18,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32 -; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP25]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP11]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: @@ -57,8 +55,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 +; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 ; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP8]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -127,14 +124,9 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32 -; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP32]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP11]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4 -; IF-EVL-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP14]] +; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP11]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -172,14 +164,10 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 +; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 ; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP8]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 -; NO-VP-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 -; NO-VP-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP11]] +; NO-VP-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP8]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -255,24 +243,16 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP8]] to i32 -; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP39]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP11]] -; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4 -; IF-EVL-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP14]] -; IF-EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP15]], 4 -; IF-EVL-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 -; IF-EVL-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement poison, i32 11, i32 [[TMP17]] +; IF-EVL-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP11]] +; IF-EVL-NEXT: [[VECTOR_RECUR_INIT2:%.*]] = insertelement poison, i32 11, i32 [[TMP11]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT2]], %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP39]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP18]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) @@ -307,24 +287,17 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 +; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 ; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP8]] -; NO-VP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 -; NO-VP-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 -; NO-VP-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP11]] -; NO-VP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4 -; NO-VP-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 -; NO-VP-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement poison, i32 11, i32 [[TMP14]] +; NO-VP-NEXT: [[VECTOR_RECUR_INIT1:%.*]] = insertelement poison, i32 22, i32 [[TMP8]] +; NO-VP-NEXT: [[VECTOR_RECUR_INIT2:%.*]] = insertelement poison, i32 11, i32 [[TMP8]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VECTOR_RECUR2:%.*]] = phi [ [[VECTOR_RECUR_INIT1]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VECTOR_RECUR4:%.*]] = phi [ [[VECTOR_RECUR_INIT2]], %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; NO-VP-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP16]], align 4 ; NO-VP-NEXT: [[TMP18]] = call @llvm.vector.splice.nxv4i32( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) @@ -406,9 +379,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; IF-EVL-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP4]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP8]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: @@ -457,8 +428,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]] ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 +; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 ; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 33, i32 [[TMP8]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] @@ -535,9 +505,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP13]], 2 -; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP19]], 1 +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP5]], 1 ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 33, i32 [[TMP10]] ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] ; IF-EVL: [[VECTOR_BODY]]: @@ -581,8 +549,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[INDUCTION:%.*]] = add nuw nsw zeroinitializer, [[TMP5]] ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; NO-VP-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP14]], 2 +; NO-VP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP3]] to i32 ; NO-VP-NEXT: [[TMP20:%.*]] = sub i32 [[TMP16]], 1 ; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 33, i32 [[TMP20]] ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll index 40587c0c8b68c..3d1589dd4f038 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll @@ -15,7 +15,7 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -82,7 +82,7 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 66fc0b830d6fc..ccf03aee5b2a3 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -36,8 +36,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 +; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP11]] to i32 ; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 [[PRE_LOAD]], i32 [[TMP16]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -96,11 +95,11 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8 +; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP10]], 4 +; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP12]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 +; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP12]] to i32 ; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1 ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 [[PRE_LOAD]], i32 [[TMP16]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -195,8 +194,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 4 +; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP4]] to i32 ; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 ; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 [[DOTPRE]], i32 [[TMP9]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -251,11 +249,11 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 +; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 4 +; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP7]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 4 +; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i32 [[DOTPRE]], i32 [[TMP9]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -381,8 +379,7 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[CONV1]], i64 0 ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 4 +; CHECK-VF4UF1-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP12]] to i32 ; CHECK-VF4UF1-NEXT: [[TMP18:%.*]] = sub i32 [[TMP17]], 1 ; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[TMP0]], i32 [[TMP18]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -451,14 +448,14 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f ; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 +; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP11]], 4 +; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]] ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[CONV1]], i64 0 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 4 +; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP13]] to i32 ; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = sub i32 [[TMP17]], 1 ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[TMP0]], i32 [[TMP18]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -755,8 +752,7 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-VF4UF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 +; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP6]] to i32 ; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 ; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP11]] ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -809,11 +805,11 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) { ; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8 +; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP5]], 4 +; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP7]], 2 ; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4 +; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 ; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP11]] ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]]